Inspection of "Merge branch 'release/1.9.0'" - xpressengine/xe-core - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( b130b6...8a2f54 )

by gyeong-won

created 2017-11-30 04:09 UTC

Status

Doc Comments +5 added lines patch added patch discarded remove patch

@@ -31,6 +31,9 @@  discard block
 block discarded – undo
 
     /**
      * iconv wrapper which mutes errors and works around bugs.
+     * @param string $in
+     * @param string $out
+     * @param string $text
      */
     public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
         $code = self::testIconvTruncateBug();
@@ -332,6 +335,7 @@  discard block
 block discarded – undo
 
     /**
      * Converts a string to UTF-8 based on configuration.
+     * @param HTMLPurifier_Context $context
      */
     public static function convertToUTF8($str, $config, $context) {
         $encoding = $config->get('Core.Encoding');
@@ -362,6 +366,7 @@  discard block
 block discarded – undo
      * Converts a string from UTF-8 based on configuration.
      * @note Currently, this is a lossy conversion, with unexpressable
      *       characters being omitted.
+     * @param HTMLPurifier_Context $context
      */
     public static function convertFromUTF8($str, $config, $context) {
         $encoding = $config->get('Core.Encoding');

Please login to merge, or discard this patch.

Indentation +526 added lines, -526 removed lines patch added patch discarded remove patch

@@ -7,532 +7,532 @@
 block discarded – undo
 class HTMLPurifier_Encoder
 {
 
-    /**
-     * Constructor throws fatal error if you attempt to instantiate class
-     */
-    private function __construct() {
-        trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
-    }
-
-    /**
-     * Error-handler that mutes errors, alternative to shut-up operator.
-     */
-    public static function muteErrorHandler() {}
-
-    /**
-     * iconv wrapper which mutes errors, but doesn't work around bugs.
-     */
-    public static function unsafeIconv($in, $out, $text) {
-        set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
-        $r = iconv($in, $out, $text);
-        restore_error_handler();
-        return $r;
-    }
-
-    /**
-     * iconv wrapper which mutes errors and works around bugs.
-     */
-    public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
-        $code = self::testIconvTruncateBug();
-        if ($code == self::ICONV_OK) {
-            return self::unsafeIconv($in, $out, $text);
-        } elseif ($code == self::ICONV_TRUNCATES) {
-            // we can only work around this if the input character set
-            // is utf-8
-            if ($in == 'utf-8') {
-                if ($max_chunk_size < 4) {
-                    trigger_error('max_chunk_size is too small', E_USER_WARNING);
-                    return false;
-                }
-                // split into 8000 byte chunks, but be careful to handle
-                // multibyte boundaries properly
-                if (($c = strlen($text)) <= $max_chunk_size) {
-                    return self::unsafeIconv($in, $out, $text);
-                }
-                $r = '';
-                $i = 0;
-                while (true) {
-                    if ($i + $max_chunk_size >= $c) {
-                        $r .= self::unsafeIconv($in, $out, substr($text, $i));
-                        break;
-                    }
-                    // wibble the boundary
-                    if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
-                        $chunk_size = $max_chunk_size;
-                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
-                        $chunk_size = $max_chunk_size - 1;
-                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
-                        $chunk_size = $max_chunk_size - 2;
-                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
-                        $chunk_size = $max_chunk_size - 3;
-                    } else {
-                        return false; // rather confusing UTF-8...
-                    }
-                    $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
-                    $r .= self::unsafeIconv($in, $out, $chunk);
-                    $i += $chunk_size;
-                }
-                return $r;
-            } else {
-                return false;
-            }
-        } else {
-            return false;
-        }
-    }
-
-    /**
-     * Cleans a UTF-8 string for well-formedness and SGML validity
-     *
-     * It will parse according to UTF-8 and return a valid UTF8 string, with
-     * non-SGML codepoints excluded.
-     *
-     * @note Just for reference, the non-SGML code points are 0 to 31 and
-     *       127 to 159, inclusive.  However, we allow code points 9, 10
-     *       and 13, which are the tab, line feed and carriage return
-     *       respectively. 128 and above the code points map to multibyte
-     *       UTF-8 representations.
-     *
-     * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and
-     *       [email protected] at <http://iki.fi/hsivonen/php-utf8/> under the
-     *       LGPL license.  Notes on what changed are inside, but in general,
-     *       the original code transformed UTF-8 text into an array of integer
-     *       Unicode codepoints. Understandably, transforming that back to
-     *       a string would be somewhat expensive, so the function was modded to
-     *       directly operate on the string.  However, this discourages code
-     *       reuse, and the logic enumerated here would be useful for any
-     *       function that needs to be able to understand UTF-8 characters.
-     *       As of right now, only smart lossless character encoding converters
-     *       would need that, and I'm probably not going to implement them.
-     *       Once again, PHP 6 should solve all our problems.
-     */
-    public static function cleanUTF8($str, $force_php = false) {
-
-        // UTF-8 validity is checked since PHP 4.3.5
-        // This is an optimization: if the string is already valid UTF-8, no
-        // need to do PHP stuff. 99% of the time, this will be the case.
-        // The regexp matches the XML char production, as well as well as excluding
-        // non-SGML codepoints U+007F to U+009F
-        if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) {
-            return $str;
-        }
-
-        $mState = 0; // cached expected number of octets after the current octet
-                     // until the beginning of the next UTF8 character sequence
-        $mUcs4  = 0; // cached Unicode character
-        $mBytes = 1; // cached expected number of octets in the current sequence
-
-        // original code involved an $out that was an array of Unicode
-        // codepoints.  Instead of having to convert back into UTF-8, we've
-        // decided to directly append valid UTF-8 characters onto a string
-        // $out once they're done.  $char accumulates raw bytes, while $mUcs4
-        // turns into the Unicode code point, so there's some redundancy.
-
-        $out = '';
-        $char = '';
-
-        $len = strlen($str);
-        for($i = 0; $i < $len; $i++) {
-            $in = ord($str{$i});
-            $char .= $str[$i]; // append byte to char
-            if (0 == $mState) {
-                // When mState is zero we expect either a US-ASCII character
-                // or a multi-octet sequence.
-                if (0 == (0x80 & ($in))) {
-                    // US-ASCII, pass straight through.
-                    if (($in <= 31 || $in == 127) &&
-                        !($in == 9 || $in == 13 || $in == 10) // save \r\t\n
-                    ) {
-                        // control characters, remove
-                    } else {
-                        $out .= $char;
-                    }
-                    // reset
-                    $char = '';
-                    $mBytes = 1;
-                } elseif (0xC0 == (0xE0 & ($in))) {
-                    // First octet of 2 octet sequence
-                    $mUcs4 = ($in);
-                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
-                    $mState = 1;
-                    $mBytes = 2;
-                } elseif (0xE0 == (0xF0 & ($in))) {
-                    // First octet of 3 octet sequence
-                    $mUcs4 = ($in);
-                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
-                    $mState = 2;
-                    $mBytes = 3;
-                } elseif (0xF0 == (0xF8 & ($in))) {
-                    // First octet of 4 octet sequence
-                    $mUcs4 = ($in);
-                    $mUcs4 = ($mUcs4 & 0x07) << 18;
-                    $mState = 3;
-                    $mBytes = 4;
-                } elseif (0xF8 == (0xFC & ($in))) {
-                    // First octet of 5 octet sequence.
-                    //
-                    // This is illegal because the encoded codepoint must be
-                    // either:
-                    // (a) not the shortest form or
-                    // (b) outside the Unicode range of 0-0x10FFFF.
-                    // Rather than trying to resynchronize, we will carry on
-                    // until the end of the sequence and let the later error
-                    // handling code catch it.
-                    $mUcs4 = ($in);
-                    $mUcs4 = ($mUcs4 & 0x03) << 24;
-                    $mState = 4;
-                    $mBytes = 5;
-                } elseif (0xFC == (0xFE & ($in))) {
-                    // First octet of 6 octet sequence, see comments for 5
-                    // octet sequence.
-                    $mUcs4 = ($in);
-                    $mUcs4 = ($mUcs4 & 1) << 30;
-                    $mState = 5;
-                    $mBytes = 6;
-                } else {
-                    // Current octet is neither in the US-ASCII range nor a
-                    // legal first octet of a multi-octet sequence.
-                    $mState = 0;
-                    $mUcs4  = 0;
-                    $mBytes = 1;
-                    $char = '';
-                }
-            } else {
-                // When mState is non-zero, we expect a continuation of the
-                // multi-octet sequence
-                if (0x80 == (0xC0 & ($in))) {
-                    // Legal continuation.
-                    $shift = ($mState - 1) * 6;
-                    $tmp = $in;
-                    $tmp = ($tmp & 0x0000003F) << $shift;
-                    $mUcs4 |= $tmp;
-
-                    if (0 == --$mState) {
-                        // End of the multi-octet sequence. mUcs4 now contains
-                        // the final Unicode codepoint to be output
-
-                        // Check for illegal sequences and codepoints.
-
-                        // From Unicode 3.1, non-shortest form is illegal
-                        if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
-                            ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
-                            ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
-                            (4 < $mBytes) ||
-                            // From Unicode 3.2, surrogate characters = illegal
-                            (($mUcs4 & 0xFFFFF800) == 0xD800) ||
-                            // Codepoints outside the Unicode range are illegal
-                            ($mUcs4 > 0x10FFFF)
-                        ) {
-
-                        } elseif (0xFEFF != $mUcs4 && // omit BOM
-                            // check for valid Char unicode codepoints
-                            (
-                                0x9 == $mUcs4 ||
-                                0xA == $mUcs4 ||
-                                0xD == $mUcs4 ||
-                                (0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
-                                // 7F-9F is not strictly prohibited by XML,
-                                // but it is non-SGML, and thus we don't allow it
-                                (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
-                                (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
-                            )
-                        ) {
-                            $out .= $char;
-                        }
-                        // initialize UTF8 cache (reset)
-                        $mState = 0;
-                        $mUcs4  = 0;
-                        $mBytes = 1;
-                        $char = '';
-                    }
-                } else {
-                    // ((0xC0 & (*in) != 0x80) && (mState != 0))
-                    // Incomplete multi-octet sequence.
-                    // used to result in complete fail, but we'll reset
-                    $mState = 0;
-                    $mUcs4  = 0;
-                    $mBytes = 1;
-                    $char ='';
-                }
-            }
-        }
-        return $out;
-    }
-
-    /**
-     * Translates a Unicode codepoint into its corresponding UTF-8 character.
-     * @note Based on Feyd's function at
-     *       <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
-     *       which is in public domain.
-     * @note While we're going to do code point parsing anyway, a good
-     *       optimization would be to refuse to translate code points that
-     *       are non-SGML characters.  However, this could lead to duplication.
-     * @note This is very similar to the unichr function in
-     *       maintenance/generate-entity-file.php (although this is superior,
-     *       due to its sanity checks).
-     */
-
-    // +----------+----------+----------+----------+
-    // | 33222222 | 22221111 | 111111   |          |
-    // | 10987654 | 32109876 | 54321098 | 76543210 | bit
-    // +----------+----------+----------+----------+
-    // |          |          |          | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
-    // |          |          | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
-    // |          | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
-    // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
-    // +----------+----------+----------+----------+
-    // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
-    // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
-    // +----------+----------+----------+----------+
-
-    public static function unichr($code) {
-        if($code > 1114111 or $code < 0 or
-          ($code >= 55296 and $code <= 57343) ) {
-            // bits are set outside the "valid" range as defined
-            // by UNICODE 4.1.0
-            return '';
-        }
-
-        $x = $y = $z = $w = 0;
-        if ($code < 128) {
-            // regular ASCII character
-            $x = $code;
-        } else {
-            // set up bits for UTF-8
-            $x = ($code & 63) | 128;
-            if ($code < 2048) {
-                $y = (($code & 2047) >> 6) | 192;
-            } else {
-                $y = (($code & 4032) >> 6) | 128;
-                if($code < 65536) {
-                    $z = (($code >> 12) & 15) | 224;
-                } else {
-                    $z = (($code >> 12) & 63) | 128;
-                    $w = (($code >> 18) & 7)  | 240;
-                }
-            }
-        }
-        // set up the actual character
-        $ret = '';
-        if($w) $ret .= chr($w);
-        if($z) $ret .= chr($z);
-        if($y) $ret .= chr($y);
-        $ret .= chr($x);
-
-        return $ret;
-    }
-
-    public static function iconvAvailable() {
-        static $iconv = null;
-        if ($iconv === null) {
-            $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
-        }
-        return $iconv;
-    }
-
-    /**
-     * Converts a string to UTF-8 based on configuration.
-     */
-    public static function convertToUTF8($str, $config, $context) {
-        $encoding = $config->get('Core.Encoding');
-        if ($encoding === 'utf-8') return $str;
-        static $iconv = null;
-        if ($iconv === null) $iconv = self::iconvAvailable();
-        if ($iconv && !$config->get('Test.ForceNoIconv')) {
-            // unaffected by bugs, since UTF-8 support all characters
-            $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
-            if ($str === false) {
-                // $encoding is not a valid encoding
-                trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
-                return '';
-            }
-            // If the string is bjorked by Shift_JIS or a similar encoding
-            // that doesn't support all of ASCII, convert the naughty
-            // characters to their true byte-wise ASCII/UTF-8 equivalents.
-            $str = strtr($str, self::testEncodingSupportsASCII($encoding));
-            return $str;
-        } elseif ($encoding === 'iso-8859-1') {
-            $str = utf8_encode($str);
-            return $str;
-        }
-        trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
-    }
-
-    /**
-     * Converts a string from UTF-8 based on configuration.
-     * @note Currently, this is a lossy conversion, with unexpressable
-     *       characters being omitted.
-     */
-    public static function convertFromUTF8($str, $config, $context) {
-        $encoding = $config->get('Core.Encoding');
-        if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
-            $str = self::convertToASCIIDumbLossless($str);
-        }
-        if ($encoding === 'utf-8') return $str;
-        static $iconv = null;
-        if ($iconv === null) $iconv = self::iconvAvailable();
-        if ($iconv && !$config->get('Test.ForceNoIconv')) {
-            // Undo our previous fix in convertToUTF8, otherwise iconv will barf
-            $ascii_fix = self::testEncodingSupportsASCII($encoding);
-            if (!$escape && !empty($ascii_fix)) {
-                $clear_fix = array();
-                foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
-                $str = strtr($str, $clear_fix);
-            }
-            $str = strtr($str, array_flip($ascii_fix));
-            // Normal stuff
-            $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
-            return $str;
-        } elseif ($encoding === 'iso-8859-1') {
-            $str = utf8_decode($str);
-            return $str;
-        }
-        trigger_error('Encoding not supported', E_USER_ERROR);
-        // You might be tempted to assume that the ASCII representation
-        // might be OK, however, this is *not* universally true over all
-        // encodings.  So we take the conservative route here, rather
-        // than forcibly turn on %Core.EscapeNonASCIICharacters
-    }
-
-    /**
-     * Lossless (character-wise) conversion of HTML to ASCII
-     * @param $str UTF-8 string to be converted to ASCII
-     * @returns ASCII encoded string with non-ASCII character entity-ized
-     * @warning Adapted from MediaWiki, claiming fair use: this is a common
-     *       algorithm. If you disagree with this license fudgery,
-     *       implement it yourself.
-     * @note Uses decimal numeric entities since they are best supported.
-     * @note This is a DUMB function: it has no concept of keeping
-     *       character entities that the projected character encoding
-     *       can allow. We could possibly implement a smart version
-     *       but that would require it to also know which Unicode
-     *       codepoints the charset supported (not an easy task).
-     * @note Sort of with cleanUTF8() but it assumes that $str is
-     *       well-formed UTF-8
-     */
-    public static function convertToASCIIDumbLossless($str) {
-        $bytesleft = 0;
-        $result = '';
-        $working = 0;
-        $len = strlen($str);
-        for( $i = 0; $i < $len; $i++ ) {
-            $bytevalue = ord( $str[$i] );
-            if( $bytevalue <= 0x7F ) { //0xxx xxxx
-                $result .= chr( $bytevalue );
-                $bytesleft = 0;
-            } elseif( $bytevalue <= 0xBF ) { //10xx xxxx
-                $working = $working << 6;
-                $working += ($bytevalue & 0x3F);
-                $bytesleft--;
-                if( $bytesleft <= 0 ) {
-                    $result .= "&#" . $working . ";";
-                }
-            } elseif( $bytevalue <= 0xDF ) { //110x xxxx
-                $working = $bytevalue & 0x1F;
-                $bytesleft = 1;
-            } elseif( $bytevalue <= 0xEF ) { //1110 xxxx
-                $working = $bytevalue & 0x0F;
-                $bytesleft = 2;
-            } else { //1111 0xxx
-                $working = $bytevalue & 0x07;
-                $bytesleft = 3;
-            }
-        }
-        return $result;
-    }
-
-    /** No bugs detected in iconv. */
-    const ICONV_OK = 0;
-
-    /** Iconv truncates output if converting from UTF-8 to another
-     *  character set with //IGNORE, and a non-encodable character is found */
-    const ICONV_TRUNCATES = 1;
-
-    /** Iconv does not support //IGNORE, making it unusable for
-     *  transcoding purposes */
-    const ICONV_UNUSABLE = 2;
-
-    /**
-     * glibc iconv has a known bug where it doesn't handle the magic
-     * //IGNORE stanza correctly.  In particular, rather than ignore
-     * characters, it will return an EILSEQ after consuming some number
-     * of characters, and expect you to restart iconv as if it were
-     * an E2BIG.  Old versions of PHP did not respect the errno, and
-     * returned the fragment, so as a result you would see iconv
-     * mysteriously truncating output. We can work around this by
-     * manually chopping our input into segments of about 8000
-     * characters, as long as PHP ignores the error code.  If PHP starts
-     * paying attention to the error code, iconv becomes unusable.
-     *
-     * @returns Error code indicating severity of bug.
-     */
-    public static function testIconvTruncateBug() {
-        static $code = null;
-        if ($code === null) {
-            // better not use iconv, otherwise infinite loop!
-            $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
-            if ($r === false) {
-                $code = self::ICONV_UNUSABLE;
-            } elseif (($c = strlen($r)) < 9000) {
-                $code = self::ICONV_TRUNCATES;
-            } elseif ($c > 9000) {
-                trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR);
-            } else {
-                $code = self::ICONV_OK;
-            }
-        }
-        return $code;
-    }
-
-    /**
-     * This expensive function tests whether or not a given character
-     * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
-     * fail this test, and require special processing. Variable width
-     * encodings shouldn't ever fail.
-     *
-     * @param string $encoding Encoding name to test, as per iconv format
-     * @param bool $bypass Whether or not to bypass the precompiled arrays.
-     * @return Array of UTF-8 characters to their corresponding ASCII,
-     *      which can be used to "undo" any overzealous iconv action.
-     */
-    public static function testEncodingSupportsASCII($encoding, $bypass = false) {
-        // All calls to iconv here are unsafe, proof by case analysis:
-        // If ICONV_OK, no difference.
-        // If ICONV_TRUNCATE, all calls involve one character inputs,
-        // so bug is not triggered.
-        // If ICONV_UNUSABLE, this call is irrelevant
-        static $encodings = array();
-        if (!$bypass) {
-            if (isset($encodings[$encoding])) return $encodings[$encoding];
-            $lenc = strtolower($encoding);
-            switch ($lenc) {
-                case 'shift_jis':
-                    return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
-                case 'johab':
-                    return array("\xE2\x82\xA9" => '\\');
-            }
-            if (strpos($lenc, 'iso-8859-') === 0) return array();
-        }
-        $ret = array();
-        if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
-        for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
-            $c = chr($i); // UTF-8 char
-            $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
-            if (
-                $r === '' ||
-                // This line is needed for iconv implementations that do not
-                // omit characters that do not exist in the target character set
-                ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
-            ) {
-                // Reverse engineer: what's the UTF-8 equiv of this byte
-                // sequence? This assumes that there's no variable width
-                // encoding that doesn't support ASCII.
-                $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
-            }
-        }
-        $encodings[$encoding] = $ret;
-        return $ret;
-    }
+	/**
+	 * Constructor throws fatal error if you attempt to instantiate class
+	 */
+	private function __construct() {
+		trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
+	}
+
+	/**
+	 * Error-handler that mutes errors, alternative to shut-up operator.
+	 */
+	public static function muteErrorHandler() {}
+
+	/**
+	 * iconv wrapper which mutes errors, but doesn't work around bugs.
+	 */
+	public static function unsafeIconv($in, $out, $text) {
+		set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
+		$r = iconv($in, $out, $text);
+		restore_error_handler();
+		return $r;
+	}
+
+	/**
+	 * iconv wrapper which mutes errors and works around bugs.
+	 */
+	public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
+		$code = self::testIconvTruncateBug();
+		if ($code == self::ICONV_OK) {
+			return self::unsafeIconv($in, $out, $text);
+		} elseif ($code == self::ICONV_TRUNCATES) {
+			// we can only work around this if the input character set
+			// is utf-8
+			if ($in == 'utf-8') {
+				if ($max_chunk_size < 4) {
+					trigger_error('max_chunk_size is too small', E_USER_WARNING);
+					return false;
+				}
+				// split into 8000 byte chunks, but be careful to handle
+				// multibyte boundaries properly
+				if (($c = strlen($text)) <= $max_chunk_size) {
+					return self::unsafeIconv($in, $out, $text);
+				}
+				$r = '';
+				$i = 0;
+				while (true) {
+					if ($i + $max_chunk_size >= $c) {
+						$r .= self::unsafeIconv($in, $out, substr($text, $i));
+						break;
+					}
+					// wibble the boundary
+					if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
+						$chunk_size = $max_chunk_size;
+					} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
+						$chunk_size = $max_chunk_size - 1;
+					} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
+						$chunk_size = $max_chunk_size - 2;
+					} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
+						$chunk_size = $max_chunk_size - 3;
+					} else {
+						return false; // rather confusing UTF-8...
+					}
+					$chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
+					$r .= self::unsafeIconv($in, $out, $chunk);
+					$i += $chunk_size;
+				}
+				return $r;
+			} else {
+				return false;
+			}
+		} else {
+			return false;
+		}
+	}
+
+	/**
+	 * Cleans a UTF-8 string for well-formedness and SGML validity
+	 *
+	 * It will parse according to UTF-8 and return a valid UTF8 string, with
+	 * non-SGML codepoints excluded.
+	 *
+	 * @note Just for reference, the non-SGML code points are 0 to 31 and
+	 *       127 to 159, inclusive.  However, we allow code points 9, 10
+	 *       and 13, which are the tab, line feed and carriage return
+	 *       respectively. 128 and above the code points map to multibyte
+	 *       UTF-8 representations.
+	 *
+	 * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and
+	 *       [email protected] at <http://iki.fi/hsivonen/php-utf8/> under the
+	 *       LGPL license.  Notes on what changed are inside, but in general,
+	 *       the original code transformed UTF-8 text into an array of integer
+	 *       Unicode codepoints. Understandably, transforming that back to
+	 *       a string would be somewhat expensive, so the function was modded to
+	 *       directly operate on the string.  However, this discourages code
+	 *       reuse, and the logic enumerated here would be useful for any
+	 *       function that needs to be able to understand UTF-8 characters.
+	 *       As of right now, only smart lossless character encoding converters
+	 *       would need that, and I'm probably not going to implement them.
+	 *       Once again, PHP 6 should solve all our problems.
+	 */
+	public static function cleanUTF8($str, $force_php = false) {
+
+		// UTF-8 validity is checked since PHP 4.3.5
+		// This is an optimization: if the string is already valid UTF-8, no
+		// need to do PHP stuff. 99% of the time, this will be the case.
+		// The regexp matches the XML char production, as well as well as excluding
+		// non-SGML codepoints U+007F to U+009F
+		if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) {
+			return $str;
+		}
+
+		$mState = 0; // cached expected number of octets after the current octet
+					 // until the beginning of the next UTF8 character sequence
+		$mUcs4  = 0; // cached Unicode character
+		$mBytes = 1; // cached expected number of octets in the current sequence
+
+		// original code involved an $out that was an array of Unicode
+		// codepoints.  Instead of having to convert back into UTF-8, we've
+		// decided to directly append valid UTF-8 characters onto a string
+		// $out once they're done.  $char accumulates raw bytes, while $mUcs4
+		// turns into the Unicode code point, so there's some redundancy.
+
+		$out = '';
+		$char = '';
+
+		$len = strlen($str);
+		for($i = 0; $i < $len; $i++) {
+			$in = ord($str{$i});
+			$char .= $str[$i]; // append byte to char
+			if (0 == $mState) {
+				// When mState is zero we expect either a US-ASCII character
+				// or a multi-octet sequence.
+				if (0 == (0x80 & ($in))) {
+					// US-ASCII, pass straight through.
+					if (($in <= 31 || $in == 127) &&
+						!($in == 9 || $in == 13 || $in == 10) // save \r\t\n
+					) {
+						// control characters, remove
+					} else {
+						$out .= $char;
+					}
+					// reset
+					$char = '';
+					$mBytes = 1;
+				} elseif (0xC0 == (0xE0 & ($in))) {
+					// First octet of 2 octet sequence
+					$mUcs4 = ($in);
+					$mUcs4 = ($mUcs4 & 0x1F) << 6;
+					$mState = 1;
+					$mBytes = 2;
+				} elseif (0xE0 == (0xF0 & ($in))) {
+					// First octet of 3 octet sequence
+					$mUcs4 = ($in);
+					$mUcs4 = ($mUcs4 & 0x0F) << 12;
+					$mState = 2;
+					$mBytes = 3;
+				} elseif (0xF0 == (0xF8 & ($in))) {
+					// First octet of 4 octet sequence
+					$mUcs4 = ($in);
+					$mUcs4 = ($mUcs4 & 0x07) << 18;
+					$mState = 3;
+					$mBytes = 4;
+				} elseif (0xF8 == (0xFC & ($in))) {
+					// First octet of 5 octet sequence.
+					//
+					// This is illegal because the encoded codepoint must be
+					// either:
+					// (a) not the shortest form or
+					// (b) outside the Unicode range of 0-0x10FFFF.
+					// Rather than trying to resynchronize, we will carry on
+					// until the end of the sequence and let the later error
+					// handling code catch it.
+					$mUcs4 = ($in);
+					$mUcs4 = ($mUcs4 & 0x03) << 24;
+					$mState = 4;
+					$mBytes = 5;
+				} elseif (0xFC == (0xFE & ($in))) {
+					// First octet of 6 octet sequence, see comments for 5
+					// octet sequence.
+					$mUcs4 = ($in);
+					$mUcs4 = ($mUcs4 & 1) << 30;
+					$mState = 5;
+					$mBytes = 6;
+				} else {
+					// Current octet is neither in the US-ASCII range nor a
+					// legal first octet of a multi-octet sequence.
+					$mState = 0;
+					$mUcs4  = 0;
+					$mBytes = 1;
+					$char = '';
+				}
+			} else {
+				// When mState is non-zero, we expect a continuation of the
+				// multi-octet sequence
+				if (0x80 == (0xC0 & ($in))) {
+					// Legal continuation.
+					$shift = ($mState - 1) * 6;
+					$tmp = $in;
+					$tmp = ($tmp & 0x0000003F) << $shift;
+					$mUcs4 |= $tmp;
+
+					if (0 == --$mState) {
+						// End of the multi-octet sequence. mUcs4 now contains
+						// the final Unicode codepoint to be output
+
+						// Check for illegal sequences and codepoints.
+
+						// From Unicode 3.1, non-shortest form is illegal
+						if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
+							((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
+							((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
+							(4 < $mBytes) ||
+							// From Unicode 3.2, surrogate characters = illegal
+							(($mUcs4 & 0xFFFFF800) == 0xD800) ||
+							// Codepoints outside the Unicode range are illegal
+							($mUcs4 > 0x10FFFF)
+						) {
+
+						} elseif (0xFEFF != $mUcs4 && // omit BOM
+							// check for valid Char unicode codepoints
+							(
+								0x9 == $mUcs4 ||
+								0xA == $mUcs4 ||
+								0xD == $mUcs4 ||
+								(0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
+								// 7F-9F is not strictly prohibited by XML,
+								// but it is non-SGML, and thus we don't allow it
+								(0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
+								(0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
+							)
+						) {
+							$out .= $char;
+						}
+						// initialize UTF8 cache (reset)
+						$mState = 0;
+						$mUcs4  = 0;
+						$mBytes = 1;
+						$char = '';
+					}
+				} else {
+					// ((0xC0 & (*in) != 0x80) && (mState != 0))
+					// Incomplete multi-octet sequence.
+					// used to result in complete fail, but we'll reset
+					$mState = 0;
+					$mUcs4  = 0;
+					$mBytes = 1;
+					$char ='';
+				}
+			}
+		}
+		return $out;
+	}
+
+	/**
+	 * Translates a Unicode codepoint into its corresponding UTF-8 character.
+	 * @note Based on Feyd's function at
+	 *       <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
+	 *       which is in public domain.
+	 * @note While we're going to do code point parsing anyway, a good
+	 *       optimization would be to refuse to translate code points that
+	 *       are non-SGML characters.  However, this could lead to duplication.
+	 * @note This is very similar to the unichr function in
+	 *       maintenance/generate-entity-file.php (although this is superior,
+	 *       due to its sanity checks).
+	 */
+
+	// +----------+----------+----------+----------+
+	// | 33222222 | 22221111 | 111111   |          |
+	// | 10987654 | 32109876 | 54321098 | 76543210 | bit
+	// +----------+----------+----------+----------+
+	// |          |          |          | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
+	// |          |          | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
+	// |          | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
+	// | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
+	// +----------+----------+----------+----------+
+	// | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
+	// | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
+	// +----------+----------+----------+----------+
+
+	public static function unichr($code) {
+		if($code > 1114111 or $code < 0 or
+		  ($code >= 55296 and $code <= 57343) ) {
+			// bits are set outside the "valid" range as defined
+			// by UNICODE 4.1.0
+			return '';
+		}
+
+		$x = $y = $z = $w = 0;
+		if ($code < 128) {
+			// regular ASCII character
+			$x = $code;
+		} else {
+			// set up bits for UTF-8
+			$x = ($code & 63) | 128;
+			if ($code < 2048) {
+				$y = (($code & 2047) >> 6) | 192;
+			} else {
+				$y = (($code & 4032) >> 6) | 128;
+				if($code < 65536) {
+					$z = (($code >> 12) & 15) | 224;
+				} else {
+					$z = (($code >> 12) & 63) | 128;
+					$w = (($code >> 18) & 7)  | 240;
+				}
+			}
+		}
+		// set up the actual character
+		$ret = '';
+		if($w) $ret .= chr($w);
+		if($z) $ret .= chr($z);
+		if($y) $ret .= chr($y);
+		$ret .= chr($x);
+
+		return $ret;
+	}
+
+	public static function iconvAvailable() {
+		static $iconv = null;
+		if ($iconv === null) {
+			$iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
+		}
+		return $iconv;
+	}
+
+	/**
+	 * Converts a string to UTF-8 based on configuration.
+	 */
+	public static function convertToUTF8($str, $config, $context) {
+		$encoding = $config->get('Core.Encoding');
+		if ($encoding === 'utf-8') return $str;
+		static $iconv = null;
+		if ($iconv === null) $iconv = self::iconvAvailable();
+		if ($iconv && !$config->get('Test.ForceNoIconv')) {
+			// unaffected by bugs, since UTF-8 support all characters
+			$str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
+			if ($str === false) {
+				// $encoding is not a valid encoding
+				trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
+				return '';
+			}
+			// If the string is bjorked by Shift_JIS or a similar encoding
+			// that doesn't support all of ASCII, convert the naughty
+			// characters to their true byte-wise ASCII/UTF-8 equivalents.
+			$str = strtr($str, self::testEncodingSupportsASCII($encoding));
+			return $str;
+		} elseif ($encoding === 'iso-8859-1') {
+			$str = utf8_encode($str);
+			return $str;
+		}
+		trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
+	}
+
+	/**
+	 * Converts a string from UTF-8 based on configuration.
+	 * @note Currently, this is a lossy conversion, with unexpressable
+	 *       characters being omitted.
+	 */
+	public static function convertFromUTF8($str, $config, $context) {
+		$encoding = $config->get('Core.Encoding');
+		if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
+			$str = self::convertToASCIIDumbLossless($str);
+		}
+		if ($encoding === 'utf-8') return $str;
+		static $iconv = null;
+		if ($iconv === null) $iconv = self::iconvAvailable();
+		if ($iconv && !$config->get('Test.ForceNoIconv')) {
+			// Undo our previous fix in convertToUTF8, otherwise iconv will barf
+			$ascii_fix = self::testEncodingSupportsASCII($encoding);
+			if (!$escape && !empty($ascii_fix)) {
+				$clear_fix = array();
+				foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
+				$str = strtr($str, $clear_fix);
+			}
+			$str = strtr($str, array_flip($ascii_fix));
+			// Normal stuff
+			$str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
+			return $str;
+		} elseif ($encoding === 'iso-8859-1') {
+			$str = utf8_decode($str);
+			return $str;
+		}
+		trigger_error('Encoding not supported', E_USER_ERROR);
+		// You might be tempted to assume that the ASCII representation
+		// might be OK, however, this is *not* universally true over all
+		// encodings.  So we take the conservative route here, rather
+		// than forcibly turn on %Core.EscapeNonASCIICharacters
+	}
+
+	/**
+	 * Lossless (character-wise) conversion of HTML to ASCII
+	 * @param $str UTF-8 string to be converted to ASCII
+	 * @returns ASCII encoded string with non-ASCII character entity-ized
+	 * @warning Adapted from MediaWiki, claiming fair use: this is a common
+	 *       algorithm. If you disagree with this license fudgery,
+	 *       implement it yourself.
+	 * @note Uses decimal numeric entities since they are best supported.
+	 * @note This is a DUMB function: it has no concept of keeping
+	 *       character entities that the projected character encoding
+	 *       can allow. We could possibly implement a smart version
+	 *       but that would require it to also know which Unicode
+	 *       codepoints the charset supported (not an easy task).
+	 * @note Sort of with cleanUTF8() but it assumes that $str is
+	 *       well-formed UTF-8
+	 */
+	public static function convertToASCIIDumbLossless($str) {
+		$bytesleft = 0;
+		$result = '';
+		$working = 0;
+		$len = strlen($str);
+		for( $i = 0; $i < $len; $i++ ) {
+			$bytevalue = ord( $str[$i] );
+			if( $bytevalue <= 0x7F ) { //0xxx xxxx
+				$result .= chr( $bytevalue );
+				$bytesleft = 0;
+			} elseif( $bytevalue <= 0xBF ) { //10xx xxxx
+				$working = $working << 6;
+				$working += ($bytevalue & 0x3F);
+				$bytesleft--;
+				if( $bytesleft <= 0 ) {
+					$result .= "&#" . $working . ";";
+				}
+			} elseif( $bytevalue <= 0xDF ) { //110x xxxx
+				$working = $bytevalue & 0x1F;
+				$bytesleft = 1;
+			} elseif( $bytevalue <= 0xEF ) { //1110 xxxx
+				$working = $bytevalue & 0x0F;
+				$bytesleft = 2;
+			} else { //1111 0xxx
+				$working = $bytevalue & 0x07;
+				$bytesleft = 3;
+			}
+		}
+		return $result;
+	}
+
+	/** No bugs detected in iconv. */
+	const ICONV_OK = 0;
+
+	/** Iconv truncates output if converting from UTF-8 to another
+	 *  character set with //IGNORE, and a non-encodable character is found */
+	const ICONV_TRUNCATES = 1;
+
+	/** Iconv does not support //IGNORE, making it unusable for
+	 *  transcoding purposes */
+	const ICONV_UNUSABLE = 2;
+
+	/**
+	 * glibc iconv has a known bug where it doesn't handle the magic
+	 * //IGNORE stanza correctly.  In particular, rather than ignore
+	 * characters, it will return an EILSEQ after consuming some number
+	 * of characters, and expect you to restart iconv as if it were
+	 * an E2BIG.  Old versions of PHP did not respect the errno, and
+	 * returned the fragment, so as a result you would see iconv
+	 * mysteriously truncating output. We can work around this by
+	 * manually chopping our input into segments of about 8000
+	 * characters, as long as PHP ignores the error code.  If PHP starts
+	 * paying attention to the error code, iconv becomes unusable.
+	 *
+	 * @returns Error code indicating severity of bug.
+	 */
+	public static function testIconvTruncateBug() {
+		static $code = null;
+		if ($code === null) {
+			// better not use iconv, otherwise infinite loop!
+			$r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
+			if ($r === false) {
+				$code = self::ICONV_UNUSABLE;
+			} elseif (($c = strlen($r)) < 9000) {
+				$code = self::ICONV_TRUNCATES;
+			} elseif ($c > 9000) {
+				trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR);
+			} else {
+				$code = self::ICONV_OK;
+			}
+		}
+		return $code;
+	}
+
+	/**
+	 * This expensive function tests whether or not a given character
+	 * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
+	 * fail this test, and require special processing. Variable width
+	 * encodings shouldn't ever fail.
+	 *
+	 * @param string $encoding Encoding name to test, as per iconv format
+	 * @param bool $bypass Whether or not to bypass the precompiled arrays.
+	 * @return Array of UTF-8 characters to their corresponding ASCII,
+	 *      which can be used to "undo" any overzealous iconv action.
+	 */
+	public static function testEncodingSupportsASCII($encoding, $bypass = false) {
+		// All calls to iconv here are unsafe, proof by case analysis:
+		// If ICONV_OK, no difference.
+		// If ICONV_TRUNCATE, all calls involve one character inputs,
+		// so bug is not triggered.
+		// If ICONV_UNUSABLE, this call is irrelevant
+		static $encodings = array();
+		if (!$bypass) {
+			if (isset($encodings[$encoding])) return $encodings[$encoding];
+			$lenc = strtolower($encoding);
+			switch ($lenc) {
+				case 'shift_jis':
+					return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
+				case 'johab':
+					return array("\xE2\x82\xA9" => '\\');
+			}
+			if (strpos($lenc, 'iso-8859-') === 0) return array();
+		}
+		$ret = array();
+		if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
+		for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
+			$c = chr($i); // UTF-8 char
+			$r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
+			if (
+				$r === '' ||
+				// This line is needed for iconv implementations that do not
+				// omit characters that do not exist in the target character set
+				($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
+			) {
+				// Reverse engineer: what's the UTF-8 equiv of this byte
+				// sequence? This assumes that there's no variable width
+				// encoding that doesn't support ASCII.
+				$ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
+			}
+		}
+		$encodings[$encoding] = $ret;
+		return $ret;
+	}
 
 
 }

Please login to merge, or discard this patch.

Braces +33 added lines, -11 removed lines patch added patch discarded remove patch

@@ -314,9 +314,15 @@  discard block
 block discarded – undo
         }
         // set up the actual character
         $ret = '';
-        if($w) $ret .= chr($w);
-        if($z) $ret .= chr($z);
-        if($y) $ret .= chr($y);
+        if($w) {
+        	$ret .= chr($w);
+        }
+        if($z) {
+        	$ret .= chr($z);
+        }
+        if($y) {
+        	$ret .= chr($y);
+        }
         $ret .= chr($x);
 
         return $ret;
@@ -335,9 +341,13 @@  discard block
 block discarded – undo
      */
     public static function convertToUTF8($str, $config, $context) {
         $encoding = $config->get('Core.Encoding');
-        if ($encoding === 'utf-8') return $str;
+        if ($encoding === 'utf-8') {
+        	return $str;
+        }
         static $iconv = null;
-        if ($iconv === null) $iconv = self::iconvAvailable();
+        if ($iconv === null) {
+        	$iconv = self::iconvAvailable();
+        }
         if ($iconv && !$config->get('Test.ForceNoIconv')) {
             // unaffected by bugs, since UTF-8 support all characters
             $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
@@ -368,15 +378,21 @@  discard block
 block discarded – undo
         if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
             $str = self::convertToASCIIDumbLossless($str);
         }
-        if ($encoding === 'utf-8') return $str;
+        if ($encoding === 'utf-8') {
+        	return $str;
+        }
         static $iconv = null;
-        if ($iconv === null) $iconv = self::iconvAvailable();
+        if ($iconv === null) {
+        	$iconv = self::iconvAvailable();
+        }
         if ($iconv && !$config->get('Test.ForceNoIconv')) {
             // Undo our previous fix in convertToUTF8, otherwise iconv will barf
             $ascii_fix = self::testEncodingSupportsASCII($encoding);
             if (!$escape && !empty($ascii_fix)) {
                 $clear_fix = array();
-                foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
+                foreach ($ascii_fix as $utf8 => $native) {
+                	$clear_fix[$utf8] = '';
+                }
                 $str = strtr($str, $clear_fix);
             }
             $str = strtr($str, array_flip($ascii_fix));
@@ -503,7 +519,9 @@  discard block
 block discarded – undo
         // If ICONV_UNUSABLE, this call is irrelevant
         static $encodings = array();
         if (!$bypass) {
-            if (isset($encodings[$encoding])) return $encodings[$encoding];
+            if (isset($encodings[$encoding])) {
+            	return $encodings[$encoding];
+            }
             $lenc = strtolower($encoding);
             switch ($lenc) {
                 case 'shift_jis':
@@ -511,10 +529,14 @@  discard block
 block discarded – undo
                 case 'johab':
                     return array("\xE2\x82\xA9" => '\\');
             }
-            if (strpos($lenc, 'iso-8859-') === 0) return array();
+            if (strpos($lenc, 'iso-8859-') === 0) {
+            	return array();
+            }
         }
         $ret = array();
-        if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
+        if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) {
+        	return false;
+        }
         for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
             $c = chr($i); // UTF-8 char
             $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion

Please login to merge, or discard this patch.

Spacing +21 added lines, -21 removed lines patch added patch discarded remove patch

@@ -132,7 +132,7 @@  discard block
 block discarded – undo
         $char = '';
 
         $len = strlen($str);
-        for($i = 0; $i < $len; $i++) {
+        for ($i = 0; $i < $len; $i++) {
             $in = ord($str{$i});
             $char .= $str[$i]; // append byte to char
             if (0 == $mState) {
@@ -252,7 +252,7 @@  discard block
 block discarded – undo
                     $mState = 0;
                     $mUcs4  = 0;
                     $mBytes = 1;
-                    $char ='';
+                    $char = '';
                 }
             }
         }
@@ -286,8 +286,8 @@  discard block
 block discarded – undo
     // +----------+----------+----------+----------+
 
     public static function unichr($code) {
-        if($code > 1114111 or $code < 0 or
-          ($code >= 55296 and $code <= 57343) ) {
+        if ($code > 1114111 or $code < 0 or
+          ($code >= 55296 and $code <= 57343)) {
             // bits are set outside the "valid" range as defined
             // by UNICODE 4.1.0
             return '';
@@ -304,19 +304,19 @@  discard block
 block discarded – undo
                 $y = (($code & 2047) >> 6) | 192;
             } else {
                 $y = (($code & 4032) >> 6) | 128;
-                if($code < 65536) {
+                if ($code < 65536) {
                     $z = (($code >> 12) & 15) | 224;
                 } else {
                     $z = (($code >> 12) & 63) | 128;
-                    $w = (($code >> 18) & 7)  | 240;
+                    $w = (($code >> 18) & 7) | 240;
                 }
             }
         }
         // set up the actual character
         $ret = '';
-        if($w) $ret .= chr($w);
-        if($z) $ret .= chr($z);
-        if($y) $ret .= chr($y);
+        if ($w) $ret .= chr($w);
+        if ($z) $ret .= chr($z);
+        if ($y) $ret .= chr($y);
         $ret .= chr($x);
 
         return $ret;
@@ -343,7 +343,7 @@  discard block
 block discarded – undo
             $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
             if ($str === false) {
                 // $encoding is not a valid encoding
-                trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
+                trigger_error('Invalid encoding '.$encoding, E_USER_ERROR);
                 return '';
             }
             // If the string is bjorked by Shift_JIS or a similar encoding
@@ -381,7 +381,7 @@  discard block
 block discarded – undo
             }
             $str = strtr($str, array_flip($ascii_fix));
             // Normal stuff
-            $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
+            $str = self::iconv('utf-8', $encoding.'//IGNORE', $str);
             return $str;
         } elseif ($encoding === 'iso-8859-1') {
             $str = utf8_decode($str);
@@ -415,22 +415,22 @@  discard block
 block discarded – undo
         $result = '';
         $working = 0;
         $len = strlen($str);
-        for( $i = 0; $i < $len; $i++ ) {
-            $bytevalue = ord( $str[$i] );
-            if( $bytevalue <= 0x7F ) { //0xxx xxxx
-                $result .= chr( $bytevalue );
+        for ($i = 0; $i < $len; $i++) {
+            $bytevalue = ord($str[$i]);
+            if ($bytevalue <= 0x7F) { //0xxx xxxx
+                $result .= chr($bytevalue);
                 $bytesleft = 0;
-            } elseif( $bytevalue <= 0xBF ) { //10xx xxxx
+            } elseif ($bytevalue <= 0xBF) { //10xx xxxx
                 $working = $working << 6;
                 $working += ($bytevalue & 0x3F);
                 $bytesleft--;
-                if( $bytesleft <= 0 ) {
-                    $result .= "&#" . $working . ";";
+                if ($bytesleft <= 0) {
+                    $result .= "&#".$working.";";
                 }
-            } elseif( $bytevalue <= 0xDF ) { //110x xxxx
+            } elseif ($bytevalue <= 0xDF) { //110x xxxx
                 $working = $bytevalue & 0x1F;
                 $bytesleft = 1;
-            } elseif( $bytevalue <= 0xEF ) { //1110 xxxx
+            } elseif ($bytevalue <= 0xEF) { //1110 xxxx
                 $working = $bytevalue & 0x0F;
                 $bytesleft = 2;
             } else { //1111 0xxx
@@ -470,7 +470,7 @@  discard block
 block discarded – undo
         static $code = null;
         if ($code === null) {
             // better not use iconv, otherwise infinite loop!
-            $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
+            $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1".str_repeat('a', 9000));
             if ($r === false) {
                 $code = self::ICONV_UNUSABLE;
             } elseif (($c = strlen($r)) < 9000) {

Please login to merge, or discard this patch.

classes/security/htmlpurifier/library/HTMLPurifier/ErrorCollector.php 4 patches

Doc Comments +3 added lines patch added patch discarded remove patch

@@ -25,6 +25,9 @@
 block discarded – undo
 
     protected $lines = array();
 
+    /**
+     * @param HTMLPurifier_Context $context
+     */
     public function __construct($context) {
         $this->locale    =& $context->get('Locale');
         $this->context   = $context;

Please login to merge, or discard this patch.

Indentation +196 added lines, -196 removed lines patch added patch discarded remove patch

@@ -7,202 +7,202 @@
 block discarded – undo
 class HTMLPurifier_ErrorCollector
 {
 
-    /**
-     * Identifiers for the returned error array. These are purposely numeric
-     * so list() can be used.
-     */
-    const LINENO   = 0;
-    const SEVERITY = 1;
-    const MESSAGE  = 2;
-    const CHILDREN = 3;
-
-    protected $errors;
-    protected $_current;
-    protected $_stacks = array(array());
-    protected $locale;
-    protected $generator;
-    protected $context;
-
-    protected $lines = array();
-
-    public function __construct($context) {
-        $this->locale    =& $context->get('Locale');
-        $this->context   = $context;
-        $this->_current  =& $this->_stacks[0];
-        $this->errors    =& $this->_stacks[0];
-    }
-
-    /**
-     * Sends an error message to the collector for later use
-     * @param $severity int Error severity, PHP error style (don't use E_USER_)
-     * @param $msg string Error message text
-     * @param $subst1 string First substitution for $msg
-     * @param $subst2 string ...
-     */
-    public function send($severity, $msg) {
-
-        $args = array();
-        if (func_num_args() > 2) {
-            $args = func_get_args();
-            array_shift($args);
-            unset($args[0]);
-        }
-
-        $token = $this->context->get('CurrentToken', true);
-        $line  = $token ? $token->line : $this->context->get('CurrentLine', true);
-        $col   = $token ? $token->col  : $this->context->get('CurrentCol',  true);
-        $attr  = $this->context->get('CurrentAttr', true);
-
-        // perform special substitutions, also add custom parameters
-        $subst = array();
-        if (!is_null($token)) {
-            $args['CurrentToken'] = $token;
-        }
-        if (!is_null($attr)) {
-            $subst['$CurrentAttr.Name'] = $attr;
-            if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
-        }
-
-        if (empty($args)) {
-            $msg = $this->locale->getMessage($msg);
-        } else {
-            $msg = $this->locale->formatMessage($msg, $args);
-        }
-
-        if (!empty($subst)) $msg = strtr($msg, $subst);
-
-        // (numerically indexed)
-        $error = array(
-            self::LINENO   => $line,
-            self::SEVERITY => $severity,
-            self::MESSAGE  => $msg,
-            self::CHILDREN => array()
-        );
-        $this->_current[] = $error;
-
-
-        // NEW CODE BELOW ...
-
-        $struct = null;
-        // Top-level errors are either:
-        //  TOKEN type, if $value is set appropriately, or
-        //  "syntax" type, if $value is null
-        $new_struct = new HTMLPurifier_ErrorStruct();
-        $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
-        if ($token) $new_struct->value = clone $token;
-        if (is_int($line) && is_int($col)) {
-            if (isset($this->lines[$line][$col])) {
-                $struct = $this->lines[$line][$col];
-            } else {
-                $struct = $this->lines[$line][$col] = $new_struct;
-            }
-            // These ksorts may present a performance problem
-            ksort($this->lines[$line], SORT_NUMERIC);
-        } else {
-            if (isset($this->lines[-1])) {
-                $struct = $this->lines[-1];
-            } else {
-                $struct = $this->lines[-1] = $new_struct;
-            }
-        }
-        ksort($this->lines, SORT_NUMERIC);
-
-        // Now, check if we need to operate on a lower structure
-        if (!empty($attr)) {
-            $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
-            if (!$struct->value) {
-                $struct->value = array($attr, 'PUT VALUE HERE');
-            }
-        }
-        if (!empty($cssprop)) {
-            $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
-            if (!$struct->value) {
-                // if we tokenize CSS this might be a little more difficult to do
-                $struct->value = array($cssprop, 'PUT VALUE HERE');
-            }
-        }
-
-        // Ok, structs are all setup, now time to register the error
-        $struct->addError($severity, $msg);
-    }
-
-    /**
-     * Retrieves raw error data for custom formatter to use
-     * @param List of arrays in format of array(line of error,
-     *        error severity, error message,
-     *        recursive sub-errors array)
-     */
-    public function getRaw() {
-        return $this->errors;
-    }
-
-    /**
-     * Default HTML formatting implementation for error messages
-     * @param $config Configuration array, vital for HTML output nature
-     * @param $errors Errors array to display; used for recursion.
-     */
-    public function getHTMLFormatted($config, $errors = null) {
-        $ret = array();
-
-        $this->generator = new HTMLPurifier_Generator($config, $this->context);
-        if ($errors === null) $errors = $this->errors;
-
-        // 'At line' message needs to be removed
-
-        // generation code for new structure goes here. It needs to be recursive.
-        foreach ($this->lines as $line => $col_array) {
-            if ($line == -1) continue;
-            foreach ($col_array as $col => $struct) {
-                $this->_renderStruct($ret, $struct, $line, $col);
-            }
-        }
-        if (isset($this->lines[-1])) {
-            $this->_renderStruct($ret, $this->lines[-1]);
-        }
-
-        if (empty($errors)) {
-            return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
-        } else {
-            return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
-        }
-
-    }
-
-    private function _renderStruct(&$ret, $struct, $line = null, $col = null) {
-        $stack = array($struct);
-        $context_stack = array(array());
-        while ($current = array_pop($stack)) {
-            $context = array_pop($context_stack);
-            foreach ($current->errors as $error) {
-                list($severity, $msg) = $error;
-                $string = '';
-                $string .= '<div>';
-                // W3C uses an icon to indicate the severity of the error.
-                $error = $this->locale->getErrorName($severity);
-                $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
-                if (!is_null($line) && !is_null($col)) {
-                    $string .= "<em class=\"location\">Line $line, Column $col: </em> ";
-                } else {
-                    $string .= '<em class="location">End of Document: </em> ';
-                }
-                $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
-                $string .= '</div>';
-                // Here, have a marker for the character on the column appropriate.
-                // Be sure to clip extremely long lines.
-                //$string .= '<pre>';
-                //$string .= '';
-                //$string .= '</pre>';
-                $ret[] = $string;
-            }
-            foreach ($current->children as $type => $array) {
-                $context[] = $current;
-                $stack = array_merge($stack, array_reverse($array, true));
-                for ($i = count($array); $i > 0; $i--) {
-                    $context_stack[] = $context;
-                }
-            }
-        }
-    }
+	/**
+	 * Identifiers for the returned error array. These are purposely numeric
+	 * so list() can be used.
+	 */
+	const LINENO   = 0;
+	const SEVERITY = 1;
+	const MESSAGE  = 2;
+	const CHILDREN = 3;
+
+	protected $errors;
+	protected $_current;
+	protected $_stacks = array(array());
+	protected $locale;
+	protected $generator;
+	protected $context;
+
+	protected $lines = array();
+
+	public function __construct($context) {
+		$this->locale    =& $context->get('Locale');
+		$this->context   = $context;
+		$this->_current  =& $this->_stacks[0];
+		$this->errors    =& $this->_stacks[0];
+	}
+
+	/**
+	 * Sends an error message to the collector for later use
+	 * @param $severity int Error severity, PHP error style (don't use E_USER_)
+	 * @param $msg string Error message text
+	 * @param $subst1 string First substitution for $msg
+	 * @param $subst2 string ...
+	 */
+	public function send($severity, $msg) {
+
+		$args = array();
+		if (func_num_args() > 2) {
+			$args = func_get_args();
+			array_shift($args);
+			unset($args[0]);
+		}
+
+		$token = $this->context->get('CurrentToken', true);
+		$line  = $token ? $token->line : $this->context->get('CurrentLine', true);
+		$col   = $token ? $token->col  : $this->context->get('CurrentCol',  true);
+		$attr  = $this->context->get('CurrentAttr', true);
+
+		// perform special substitutions, also add custom parameters
+		$subst = array();
+		if (!is_null($token)) {
+			$args['CurrentToken'] = $token;
+		}
+		if (!is_null($attr)) {
+			$subst['$CurrentAttr.Name'] = $attr;
+			if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
+		}
+
+		if (empty($args)) {
+			$msg = $this->locale->getMessage($msg);
+		} else {
+			$msg = $this->locale->formatMessage($msg, $args);
+		}
+
+		if (!empty($subst)) $msg = strtr($msg, $subst);
+
+		// (numerically indexed)
+		$error = array(
+			self::LINENO   => $line,
+			self::SEVERITY => $severity,
+			self::MESSAGE  => $msg,
+			self::CHILDREN => array()
+		);
+		$this->_current[] = $error;
+
+
+		// NEW CODE BELOW ...
+
+		$struct = null;
+		// Top-level errors are either:
+		//  TOKEN type, if $value is set appropriately, or
+		//  "syntax" type, if $value is null
+		$new_struct = new HTMLPurifier_ErrorStruct();
+		$new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
+		if ($token) $new_struct->value = clone $token;
+		if (is_int($line) && is_int($col)) {
+			if (isset($this->lines[$line][$col])) {
+				$struct = $this->lines[$line][$col];
+			} else {
+				$struct = $this->lines[$line][$col] = $new_struct;
+			}
+			// These ksorts may present a performance problem
+			ksort($this->lines[$line], SORT_NUMERIC);
+		} else {
+			if (isset($this->lines[-1])) {
+				$struct = $this->lines[-1];
+			} else {
+				$struct = $this->lines[-1] = $new_struct;
+			}
+		}
+		ksort($this->lines, SORT_NUMERIC);
+
+		// Now, check if we need to operate on a lower structure
+		if (!empty($attr)) {
+			$struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
+			if (!$struct->value) {
+				$struct->value = array($attr, 'PUT VALUE HERE');
+			}
+		}
+		if (!empty($cssprop)) {
+			$struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
+			if (!$struct->value) {
+				// if we tokenize CSS this might be a little more difficult to do
+				$struct->value = array($cssprop, 'PUT VALUE HERE');
+			}
+		}
+
+		// Ok, structs are all setup, now time to register the error
+		$struct->addError($severity, $msg);
+	}
+
+	/**
+	 * Retrieves raw error data for custom formatter to use
+	 * @param List of arrays in format of array(line of error,
+	 *        error severity, error message,
+	 *        recursive sub-errors array)
+	 */
+	public function getRaw() {
+		return $this->errors;
+	}
+
+	/**
+	 * Default HTML formatting implementation for error messages
+	 * @param $config Configuration array, vital for HTML output nature
+	 * @param $errors Errors array to display; used for recursion.
+	 */
+	public function getHTMLFormatted($config, $errors = null) {
+		$ret = array();
+
+		$this->generator = new HTMLPurifier_Generator($config, $this->context);
+		if ($errors === null) $errors = $this->errors;
+
+		// 'At line' message needs to be removed
+
+		// generation code for new structure goes here. It needs to be recursive.
+		foreach ($this->lines as $line => $col_array) {
+			if ($line == -1) continue;
+			foreach ($col_array as $col => $struct) {
+				$this->_renderStruct($ret, $struct, $line, $col);
+			}
+		}
+		if (isset($this->lines[-1])) {
+			$this->_renderStruct($ret, $this->lines[-1]);
+		}
+
+		if (empty($errors)) {
+			return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
+		} else {
+			return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
+		}
+
+	}
+
+	private function _renderStruct(&$ret, $struct, $line = null, $col = null) {
+		$stack = array($struct);
+		$context_stack = array(array());
+		while ($current = array_pop($stack)) {
+			$context = array_pop($context_stack);
+			foreach ($current->errors as $error) {
+				list($severity, $msg) = $error;
+				$string = '';
+				$string .= '<div>';
+				// W3C uses an icon to indicate the severity of the error.
+				$error = $this->locale->getErrorName($severity);
+				$string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
+				if (!is_null($line) && !is_null($col)) {
+					$string .= "<em class=\"location\">Line $line, Column $col: </em> ";
+				} else {
+					$string .= '<em class="location">End of Document: </em> ';
+				}
+				$string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
+				$string .= '</div>';
+				// Here, have a marker for the character on the column appropriate.
+				// Be sure to clip extremely long lines.
+				//$string .= '<pre>';
+				//$string .= '';
+				//$string .= '</pre>';
+				$ret[] = $string;
+			}
+			foreach ($current->children as $type => $array) {
+				$context[] = $current;
+				$stack = array_merge($stack, array_reverse($array, true));
+				for ($i = count($array); $i > 0; $i--) {
+					$context_stack[] = $context;
+				}
+			}
+		}
+	}
 
 }
 

Please login to merge, or discard this patch.

Braces +15 added lines, -5 removed lines patch added patch discarded remove patch

@@ -60,7 +60,9 @@  discard block
 block discarded – undo
         }
         if (!is_null($attr)) {
             $subst['$CurrentAttr.Name'] = $attr;
-            if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
+            if (isset($token->attr[$attr])) {
+            	$subst['$CurrentAttr.Value'] = $token->attr[$attr];
+            }
         }
 
         if (empty($args)) {
@@ -69,7 +71,9 @@  discard block
 block discarded – undo
             $msg = $this->locale->formatMessage($msg, $args);
         }
 
-        if (!empty($subst)) $msg = strtr($msg, $subst);
+        if (!empty($subst)) {
+        	$msg = strtr($msg, $subst);
+        }
 
         // (numerically indexed)
         $error = array(
@@ -89,7 +93,9 @@  discard block
 block discarded – undo
         //  "syntax" type, if $value is null
         $new_struct = new HTMLPurifier_ErrorStruct();
         $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
-        if ($token) $new_struct->value = clone $token;
+        if ($token) {
+        	$new_struct->value = clone $token;
+        }
         if (is_int($line) && is_int($col)) {
             if (isset($this->lines[$line][$col])) {
                 $struct = $this->lines[$line][$col];
@@ -145,13 +151,17 @@  discard block
 block discarded – undo
         $ret = array();
 
         $this->generator = new HTMLPurifier_Generator($config, $this->context);
-        if ($errors === null) $errors = $this->errors;
+        if ($errors === null) {
+        	$errors = $this->errors;
+        }
 
         // 'At line' message needs to be removed
 
         // generation code for new structure goes here. It needs to be recursive.
         foreach ($this->lines as $line => $col_array) {
-            if ($line == -1) continue;
+            if ($line == -1) {
+            	continue;
+            }
             foreach ($col_array as $col => $struct) {
                 $this->_renderStruct($ret, $struct, $line, $col);
             }

Please login to merge, or discard this patch.

Spacing +7 added lines, -7 removed lines patch added patch discarded remove patch

@@ -26,10 +26,10 @@  discard block
 block discarded – undo
     protected $lines = array();
 
     public function __construct($context) {
-        $this->locale    =& $context->get('Locale');
+        $this->locale    = & $context->get('Locale');
         $this->context   = $context;
-        $this->_current  =& $this->_stacks[0];
-        $this->errors    =& $this->_stacks[0];
+        $this->_current  = & $this->_stacks[0];
+        $this->errors    = & $this->_stacks[0];
     }
 
     /**
@@ -50,7 +50,7 @@  discard block
 block discarded – undo
 
         $token = $this->context->get('CurrentToken', true);
         $line  = $token ? $token->line : $this->context->get('CurrentLine', true);
-        $col   = $token ? $token->col  : $this->context->get('CurrentCol',  true);
+        $col   = $token ? $token->col : $this->context->get('CurrentCol', true);
         $attr  = $this->context->get('CurrentAttr', true);
 
         // perform special substitutions, also add custom parameters
@@ -161,9 +161,9 @@  discard block
 block discarded – undo
         }
 
         if (empty($errors)) {
-            return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
+            return '<p>'.$this->locale->getMessage('ErrorCollector: No errors').'</p>';
         } else {
-            return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
+            return '<ul><li>'.implode('</li><li>', $ret).'</li></ul>';
         }
 
     }
@@ -185,7 +185,7 @@  discard block
 block discarded – undo
                 } else {
                     $string .= '<em class="location">End of Document: </em> ';
                 }
-                $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
+                $string .= '<strong class="description">'.$this->generator->escape($msg).'</strong> ';
                 $string .= '</div>';
                 // Here, have a marker for the character on the column appropriate.
                 // Be sure to clip extremely long lines.

Please login to merge, or discard this patch.

classes/security/htmlpurifier/library/HTMLPurifier/Generator.php 4 patches

Doc Comments +4 added lines, -4 removed lines patch added patch discarded remove patch

@@ -70,7 +70,7 @@  discard block
 block discarded – undo
      * Generates HTML from an array of tokens.
      * @param $tokens Array of HTMLPurifier_Token
      * @param $config HTMLPurifier_Config object
-     * @return Generated HTML
+     * @return string HTML
      */
     public function generateFromTokens($tokens) {
         if (!$tokens) return '';
@@ -115,7 +115,7 @@  discard block
 block discarded – undo
     /**
      * Generates HTML from a single token.
      * @param $token HTMLPurifier_Token object.
-     * @return Generated HTML
+     * @return string HTML
      */
     public function generateFromToken($token) {
         if (!$token instanceof HTMLPurifier_Token) {
@@ -181,7 +181,7 @@  discard block
 block discarded – undo
      * @param $assoc_array_of_attributes Attribute array
      * @param $element Name of element attributes are for, used to check
      *        attribute minimization.
-     * @return Generate HTML fragment for insertion.
+     * @return string HTML fragment for insertion.
      */
     public function generateAttributes($assoc_array_of_attributes, $element = false) {
         $html = '';
@@ -238,7 +238,7 @@  discard block
 block discarded – undo
      *       for properly generating HTML here w/o using tokens, it stays
      *       public.
      * @param $string String data to escape for HTML.
-     * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
+     * @param integer $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
      *               permissible for non-attribute output.
      * @return String escaped data.
      */

Please login to merge, or discard this patch.

Indentation +238 added lines, -238 removed lines patch added patch discarded remove patch

@@ -10,244 +10,244 @@
 block discarded – undo
 class HTMLPurifier_Generator
 {
 
-    /**
-     * Whether or not generator should produce XML output
-     */
-    private $_xhtml = true;
-
-    /**
-     * :HACK: Whether or not generator should comment the insides of <script> tags
-     */
-    private $_scriptFix = false;
-
-    /**
-     * Cache of HTMLDefinition during HTML output to determine whether or
-     * not attributes should be minimized.
-     */
-    private $_def;
-
-    /**
-     * Cache of %Output.SortAttr
-     */
-    private $_sortAttr;
-
-    /**
-     * Cache of %Output.FlashCompat
-     */
-    private $_flashCompat;
-
-    /**
-     * Cache of %Output.FixInnerHTML
-     */
-    private $_innerHTMLFix;
-
-    /**
-     * Stack for keeping track of object information when outputting IE
-     * compatibility code.
-     */
-    private $_flashStack = array();
-
-    /**
-     * Configuration for the generator
-     */
-    protected $config;
-
-    /**
-     * @param $config Instance of HTMLPurifier_Config
-     * @param $context Instance of HTMLPurifier_Context
-     */
-    public function __construct($config, $context) {
-        $this->config = $config;
-        $this->_scriptFix = $config->get('Output.CommentScriptContents');
-        $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
-        $this->_sortAttr = $config->get('Output.SortAttr');
-        $this->_flashCompat = $config->get('Output.FlashCompat');
-        $this->_def = $config->getHTMLDefinition();
-        $this->_xhtml = $this->_def->doctype->xml;
-    }
-
-    /**
-     * Generates HTML from an array of tokens.
-     * @param $tokens Array of HTMLPurifier_Token
-     * @param $config HTMLPurifier_Config object
-     * @return Generated HTML
-     */
-    public function generateFromTokens($tokens) {
-        if (!$tokens) return '';
-
-        // Basic algorithm
-        $html = '';
-        for ($i = 0, $size = count($tokens); $i < $size; $i++) {
-            if ($this->_scriptFix && $tokens[$i]->name === 'script'
-                && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
-                // script special case
-                // the contents of the script block must be ONE token
-                // for this to work.
-                $html .= $this->generateFromToken($tokens[$i++]);
-                $html .= $this->generateScriptFromToken($tokens[$i++]);
-            }
-            $html .= $this->generateFromToken($tokens[$i]);
-        }
-
-        // Tidy cleanup
-        if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
-            $tidy = new Tidy;
-            $tidy->parseString($html, array(
-               'indent'=> true,
-               'output-xhtml' => $this->_xhtml,
-               'show-body-only' => true,
-               'indent-spaces' => 2,
-               'wrap' => 68,
-            ), 'utf8');
-            $tidy->cleanRepair();
-            $html = (string) $tidy; // explicit cast necessary
-        }
-
-        // Normalize newlines to system defined value
-        if ($this->config->get('Core.NormalizeNewlines')) {
-            $nl = $this->config->get('Output.Newline');
-            if ($nl === null) $nl = PHP_EOL;
-            if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
-        }
-        return $html;
-    }
-
-    /**
-     * Generates HTML from a single token.
-     * @param $token HTMLPurifier_Token object.
-     * @return Generated HTML
-     */
-    public function generateFromToken($token) {
-        if (!$token instanceof HTMLPurifier_Token) {
-            trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
-            return '';
-
-        } elseif ($token instanceof HTMLPurifier_Token_Start) {
-            $attr = $this->generateAttributes($token->attr, $token->name);
-            if ($this->_flashCompat) {
-                if ($token->name == "object") {
-                    $flash = new stdclass();
-                    $flash->attr = $token->attr;
-                    $flash->param = array();
-                    $this->_flashStack[] = $flash;
-                }
-            }
-            return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
-
-        } elseif ($token instanceof HTMLPurifier_Token_End) {
-            $_extra = '';
-            if ($this->_flashCompat) {
-                if ($token->name == "object" && !empty($this->_flashStack)) {
-                    // doesn't do anything for now
-                }
-            }
-            return $_extra . '</' . $token->name . '>';
-
-        } elseif ($token instanceof HTMLPurifier_Token_Empty) {
-            if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
-                $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
-            }
-            $attr = $this->generateAttributes($token->attr, $token->name);
-             return '<' . $token->name . ($attr ? ' ' : '') . $attr .
-                ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
-                . '>';
-
-        } elseif ($token instanceof HTMLPurifier_Token_Text) {
-            return $this->escape($token->data, ENT_NOQUOTES);
-
-        } elseif ($token instanceof HTMLPurifier_Token_Comment) {
-            return '<!--' . $token->data . '-->';
-        } else {
-            return '';
-
-        }
-    }
-
-    /**
-     * Special case processor for the contents of script tags
-     * @warning This runs into problems if there's already a literal
-     *          --> somewhere inside the script contents.
-     */
-    public function generateScriptFromToken($token) {
-        if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
-        // Thanks <http://lachy.id.au/log/2005/05/script-comments>
-        $data = preg_replace('#//\s*$#', '', $token->data);
-        return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
-    }
-
-    /**
-     * Generates attribute declarations from attribute array.
-     * @note This does not include the leading or trailing space.
-     * @param $assoc_array_of_attributes Attribute array
-     * @param $element Name of element attributes are for, used to check
-     *        attribute minimization.
-     * @return Generate HTML fragment for insertion.
-     */
-    public function generateAttributes($assoc_array_of_attributes, $element = false) {
-        $html = '';
-        if ($this->_sortAttr) ksort($assoc_array_of_attributes);
-        foreach ($assoc_array_of_attributes as $key => $value) {
-            if (!$this->_xhtml) {
-                // Remove namespaced attributes
-                if (strpos($key, ':') !== false) continue;
-                // Check if we should minimize the attribute: val="val" -> val
-                if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
-                    $html .= $key . ' ';
-                    continue;
-                }
-            }
-            // Workaround for Internet Explorer innerHTML bug.
-            // Essentially, Internet Explorer, when calculating
-            // innerHTML, omits quotes if there are no instances of
-            // angled brackets, quotes or spaces.  However, when parsing
-            // HTML (for example, when you assign to innerHTML), it
-            // treats backticks as quotes.  Thus,
-            //      <img alt="``" />
-            // becomes
-            //      <img alt=`` />
-            // becomes
-            //      <img alt='' />
-            // Fortunately, all we need to do is trigger an appropriate
-            // quoting style, which we do by adding an extra space.
-            // This also is consistent with the W3C spec, which states
-            // that user agents may ignore leading or trailing
-            // whitespace (in fact, most don't, at least for attributes
-            // like alt, but an extra space at the end is barely
-            // noticeable).  Still, we have a configuration knob for
-            // this, since this transformation is not necesary if you
-            // don't process user input with innerHTML or you don't plan
-            // on supporting Internet Explorer.
-            if ($this->_innerHTMLFix) {
-                if (strpos($value, '`') !== false) {
-                    // check if correct quoting style would not already be
-                    // triggered
-                    if (strcspn($value, '"\' <>') === strlen($value)) {
-                        // protect!
-                        $value .= ' ';
-                    }
-                }
-            }
-            $html .= $key.'="'.$this->escape($value).'" ';
-        }
-        return rtrim($html);
-    }
-
-    /**
-     * Escapes raw text data.
-     * @todo This really ought to be protected, but until we have a facility
-     *       for properly generating HTML here w/o using tokens, it stays
-     *       public.
-     * @param $string String data to escape for HTML.
-     * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
-     *               permissible for non-attribute output.
-     * @return String escaped data.
-     */
-    public function escape($string, $quote = null) {
-        // Workaround for APC bug on Mac Leopard reported by sidepodcast
-        // http://htmlpurifier.org/phorum/read.php?3,4823,4846
-        if ($quote === null) $quote = ENT_COMPAT;
-        return htmlspecialchars($string, $quote, 'UTF-8', false);
-    }
+	/**
+	 * Whether or not generator should produce XML output
+	 */
+	private $_xhtml = true;
+
+	/**
+	 * :HACK: Whether or not generator should comment the insides of <script> tags
+	 */
+	private $_scriptFix = false;
+
+	/**
+	 * Cache of HTMLDefinition during HTML output to determine whether or
+	 * not attributes should be minimized.
+	 */
+	private $_def;
+
+	/**
+	 * Cache of %Output.SortAttr
+	 */
+	private $_sortAttr;
+
+	/**
+	 * Cache of %Output.FlashCompat
+	 */
+	private $_flashCompat;
+
+	/**
+	 * Cache of %Output.FixInnerHTML
+	 */
+	private $_innerHTMLFix;
+
+	/**
+	 * Stack for keeping track of object information when outputting IE
+	 * compatibility code.
+	 */
+	private $_flashStack = array();
+
+	/**
+	 * Configuration for the generator
+	 */
+	protected $config;
+
+	/**
+	 * @param $config Instance of HTMLPurifier_Config
+	 * @param $context Instance of HTMLPurifier_Context
+	 */
+	public function __construct($config, $context) {
+		$this->config = $config;
+		$this->_scriptFix = $config->get('Output.CommentScriptContents');
+		$this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
+		$this->_sortAttr = $config->get('Output.SortAttr');
+		$this->_flashCompat = $config->get('Output.FlashCompat');
+		$this->_def = $config->getHTMLDefinition();
+		$this->_xhtml = $this->_def->doctype->xml;
+	}
+
+	/**
+	 * Generates HTML from an array of tokens.
+	 * @param $tokens Array of HTMLPurifier_Token
+	 * @param $config HTMLPurifier_Config object
+	 * @return Generated HTML
+	 */
+	public function generateFromTokens($tokens) {
+		if (!$tokens) return '';
+
+		// Basic algorithm
+		$html = '';
+		for ($i = 0, $size = count($tokens); $i < $size; $i++) {
+			if ($this->_scriptFix && $tokens[$i]->name === 'script'
+				&& $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
+				// script special case
+				// the contents of the script block must be ONE token
+				// for this to work.
+				$html .= $this->generateFromToken($tokens[$i++]);
+				$html .= $this->generateScriptFromToken($tokens[$i++]);
+			}
+			$html .= $this->generateFromToken($tokens[$i]);
+		}
+
+		// Tidy cleanup
+		if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
+			$tidy = new Tidy;
+			$tidy->parseString($html, array(
+			   'indent'=> true,
+			   'output-xhtml' => $this->_xhtml,
+			   'show-body-only' => true,
+			   'indent-spaces' => 2,
+			   'wrap' => 68,
+			), 'utf8');
+			$tidy->cleanRepair();
+			$html = (string) $tidy; // explicit cast necessary
+		}
+
+		// Normalize newlines to system defined value
+		if ($this->config->get('Core.NormalizeNewlines')) {
+			$nl = $this->config->get('Output.Newline');
+			if ($nl === null) $nl = PHP_EOL;
+			if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
+		}
+		return $html;
+	}
+
+	/**
+	 * Generates HTML from a single token.
+	 * @param $token HTMLPurifier_Token object.
+	 * @return Generated HTML
+	 */
+	public function generateFromToken($token) {
+		if (!$token instanceof HTMLPurifier_Token) {
+			trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
+			return '';
+
+		} elseif ($token instanceof HTMLPurifier_Token_Start) {
+			$attr = $this->generateAttributes($token->attr, $token->name);
+			if ($this->_flashCompat) {
+				if ($token->name == "object") {
+					$flash = new stdclass();
+					$flash->attr = $token->attr;
+					$flash->param = array();
+					$this->_flashStack[] = $flash;
+				}
+			}
+			return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
+
+		} elseif ($token instanceof HTMLPurifier_Token_End) {
+			$_extra = '';
+			if ($this->_flashCompat) {
+				if ($token->name == "object" && !empty($this->_flashStack)) {
+					// doesn't do anything for now
+				}
+			}
+			return $_extra . '</' . $token->name . '>';
+
+		} elseif ($token instanceof HTMLPurifier_Token_Empty) {
+			if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
+				$this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
+			}
+			$attr = $this->generateAttributes($token->attr, $token->name);
+			 return '<' . $token->name . ($attr ? ' ' : '') . $attr .
+				( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
+				. '>';
+
+		} elseif ($token instanceof HTMLPurifier_Token_Text) {
+			return $this->escape($token->data, ENT_NOQUOTES);
+
+		} elseif ($token instanceof HTMLPurifier_Token_Comment) {
+			return '<!--' . $token->data . '-->';
+		} else {
+			return '';
+
+		}
+	}
+
+	/**
+	 * Special case processor for the contents of script tags
+	 * @warning This runs into problems if there's already a literal
+	 *          --> somewhere inside the script contents.
+	 */
+	public function generateScriptFromToken($token) {
+		if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
+		// Thanks <http://lachy.id.au/log/2005/05/script-comments>
+		$data = preg_replace('#//\s*$#', '', $token->data);
+		return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
+	}
+
+	/**
+	 * Generates attribute declarations from attribute array.
+	 * @note This does not include the leading or trailing space.
+	 * @param $assoc_array_of_attributes Attribute array
+	 * @param $element Name of element attributes are for, used to check
+	 *        attribute minimization.
+	 * @return Generate HTML fragment for insertion.
+	 */
+	public function generateAttributes($assoc_array_of_attributes, $element = false) {
+		$html = '';
+		if ($this->_sortAttr) ksort($assoc_array_of_attributes);
+		foreach ($assoc_array_of_attributes as $key => $value) {
+			if (!$this->_xhtml) {
+				// Remove namespaced attributes
+				if (strpos($key, ':') !== false) continue;
+				// Check if we should minimize the attribute: val="val" -> val
+				if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
+					$html .= $key . ' ';
+					continue;
+				}
+			}
+			// Workaround for Internet Explorer innerHTML bug.
+			// Essentially, Internet Explorer, when calculating
+			// innerHTML, omits quotes if there are no instances of
+			// angled brackets, quotes or spaces.  However, when parsing
+			// HTML (for example, when you assign to innerHTML), it
+			// treats backticks as quotes.  Thus,
+			//      <img alt="``" />
+			// becomes
+			//      <img alt=`` />
+			// becomes
+			//      <img alt='' />
+			// Fortunately, all we need to do is trigger an appropriate
+			// quoting style, which we do by adding an extra space.
+			// This also is consistent with the W3C spec, which states
+			// that user agents may ignore leading or trailing
+			// whitespace (in fact, most don't, at least for attributes
+			// like alt, but an extra space at the end is barely
+			// noticeable).  Still, we have a configuration knob for
+			// this, since this transformation is not necesary if you
+			// don't process user input with innerHTML or you don't plan
+			// on supporting Internet Explorer.
+			if ($this->_innerHTMLFix) {
+				if (strpos($value, '`') !== false) {
+					// check if correct quoting style would not already be
+					// triggered
+					if (strcspn($value, '"\' <>') === strlen($value)) {
+						// protect!
+						$value .= ' ';
+					}
+				}
+			}
+			$html .= $key.'="'.$this->escape($value).'" ';
+		}
+		return rtrim($html);
+	}
+
+	/**
+	 * Escapes raw text data.
+	 * @todo This really ought to be protected, but until we have a facility
+	 *       for properly generating HTML here w/o using tokens, it stays
+	 *       public.
+	 * @param $string String data to escape for HTML.
+	 * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
+	 *               permissible for non-attribute output.
+	 * @return String escaped data.
+	 */
+	public function escape($string, $quote = null) {
+		// Workaround for APC bug on Mac Leopard reported by sidepodcast
+		// http://htmlpurifier.org/phorum/read.php?3,4823,4846
+		if ($quote === null) $quote = ENT_COMPAT;
+		return htmlspecialchars($string, $quote, 'UTF-8', false);
+	}
 
 }
 

Please login to merge, or discard this patch.

Braces +21 added lines, -7 removed lines patch added patch discarded remove patch

@@ -73,7 +73,9 @@  discard block
 block discarded – undo
      * @return Generated HTML
      */
     public function generateFromTokens($tokens) {
-        if (!$tokens) return '';
+        if (!$tokens) {
+        	return '';
+        }
 
         // Basic algorithm
         $html = '';
@@ -106,8 +108,12 @@  discard block
 block discarded – undo
         // Normalize newlines to system defined value
         if ($this->config->get('Core.NormalizeNewlines')) {
             $nl = $this->config->get('Output.Newline');
-            if ($nl === null) $nl = PHP_EOL;
-            if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
+            if ($nl === null) {
+            	$nl = PHP_EOL;
+            }
+            if ($nl !== "\n") {
+            	$html = str_replace("\n", $nl, $html);
+            }
         }
         return $html;
     }
@@ -169,7 +175,9 @@  discard block
 block discarded – undo
      *          --> somewhere inside the script contents.
      */
     public function generateScriptFromToken($token) {
-        if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
+        if (!$token instanceof HTMLPurifier_Token_Text) {
+        	return $this->generateFromToken($token);
+        }
         // Thanks <http://lachy.id.au/log/2005/05/script-comments>
         $data = preg_replace('#//\s*$#', '', $token->data);
         return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
@@ -185,11 +193,15 @@  discard block
 block discarded – undo
      */
     public function generateAttributes($assoc_array_of_attributes, $element = false) {
         $html = '';
-        if ($this->_sortAttr) ksort($assoc_array_of_attributes);
+        if ($this->_sortAttr) {
+        	ksort($assoc_array_of_attributes);
+        }
         foreach ($assoc_array_of_attributes as $key => $value) {
             if (!$this->_xhtml) {
                 // Remove namespaced attributes
-                if (strpos($key, ':') !== false) continue;
+                if (strpos($key, ':') !== false) {
+                	continue;
+                }
                 // Check if we should minimize the attribute: val="val" -> val
                 if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
                     $html .= $key . ' ';
@@ -245,7 +257,9 @@  discard block
 block discarded – undo
     public function escape($string, $quote = null) {
         // Workaround for APC bug on Mac Leopard reported by sidepodcast
         // http://htmlpurifier.org/phorum/read.php?3,4823,4846
-        if ($quote === null) $quote = ENT_COMPAT;
+        if ($quote === null) {
+        	$quote = ENT_COMPAT;
+        }
         return htmlspecialchars($string, $quote, 'UTF-8', false);
     }
 

Please login to merge, or discard this patch.

Spacing +9 added lines, -9 removed lines patch added patch discarded remove patch

@@ -79,7 +79,7 @@  discard block
 block discarded – undo
         $html = '';
         for ($i = 0, $size = count($tokens); $i < $size; $i++) {
             if ($this->_scriptFix && $tokens[$i]->name === 'script'
-                && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
+                && $i + 2 < $size && $tokens[$i + 2] instanceof HTMLPurifier_Token_End) {
                 // script special case
                 // the contents of the script block must be ONE token
                 // for this to work.
@@ -132,7 +132,7 @@  discard block
 block discarded – undo
                     $this->_flashStack[] = $flash;
                 }
             }
-            return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
+            return '<'.$token->name.($attr ? ' ' : '').$attr.'>';
 
         } elseif ($token instanceof HTMLPurifier_Token_End) {
             $_extra = '';
@@ -141,22 +141,22 @@  discard block
 block discarded – undo
                     // doesn't do anything for now
                 }
             }
-            return $_extra . '</' . $token->name . '>';
+            return $_extra.'</'.$token->name.'>';
 
         } elseif ($token instanceof HTMLPurifier_Token_Empty) {
             if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
-                $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
+                $this->_flashStack[count($this->_flashStack) - 1]->param[$token->attr['name']] = $token->attr['value'];
             }
             $attr = $this->generateAttributes($token->attr, $token->name);
-             return '<' . $token->name . ($attr ? ' ' : '') . $attr .
-                ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
+             return '<'.$token->name.($attr ? ' ' : '').$attr.
+                ($this->_xhtml ? ' /' : '') // <br /> v. <br>
                 . '>';
 
         } elseif ($token instanceof HTMLPurifier_Token_Text) {
             return $this->escape($token->data, ENT_NOQUOTES);
 
         } elseif ($token instanceof HTMLPurifier_Token_Comment) {
-            return '<!--' . $token->data . '-->';
+            return '<!--'.$token->data.'-->';
         } else {
             return '';
 
@@ -172,7 +172,7 @@  discard block
 block discarded – undo
         if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
         // Thanks <http://lachy.id.au/log/2005/05/script-comments>
         $data = preg_replace('#//\s*$#', '', $token->data);
-        return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
+        return '<!--//--><![CDATA[//><!--'."\n".trim($data)."\n".'//--><!]]>';
     }
 
     /**
@@ -192,7 +192,7 @@  discard block
 block discarded – undo
                 if (strpos($key, ':') !== false) continue;
                 // Check if we should minimize the attribute: val="val" -> val
                 if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
-                    $html .= $key . ' ';
+                    $html .= $key.' ';
                     continue;
                 }
             }

Please login to merge, or discard this patch.

classes/security/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php 4 patches

Doc Comments +1 added lines, -2 removed lines patch added patch discarded remove patch

@@ -385,8 +385,7 @@
 block discarded – undo
      * separate lists for processing. Format is element[attr1|attr2],element2...
      * @warning Although it's largely drawn from TinyMCE's implementation,
      *      it is different, and you'll probably have to modify your lists
-     * @param $list String list to parse
-     * @param array($allowed_elements, $allowed_attributes)
+     * @param string $list String list to parse
      * @todo Give this its own class, probably static interface
      */
     public function parseTinyMCEAllowedList($list) {

Please login to merge, or discard this patch.

Indentation +392 added lines, -392 removed lines patch added patch discarded remove patch

@@ -26,398 +26,398 @@
 block discarded – undo
 class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
 {
 
-    // FULLY-PUBLIC VARIABLES ---------------------------------------------
-
-    /**
-     * Associative array of element names to HTMLPurifier_ElementDef
-     */
-    public $info = array();
-
-    /**
-     * Associative array of global attribute name to attribute definition.
-     */
-    public $info_global_attr = array();
-
-    /**
-     * String name of parent element HTML will be going into.
-     */
-    public $info_parent = 'div';
-
-    /**
-     * Definition for parent element, allows parent element to be a
-     * tag that's not allowed inside the HTML fragment.
-     */
-    public $info_parent_def;
-
-    /**
-     * String name of element used to wrap inline elements in block context
-     * @note This is rarely used except for BLOCKQUOTEs in strict mode
-     */
-    public $info_block_wrapper = 'p';
-
-    /**
-     * Associative array of deprecated tag name to HTMLPurifier_TagTransform
-     */
-    public $info_tag_transform = array();
-
-    /**
-     * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
-     */
-    public $info_attr_transform_pre = array();
-
-    /**
-     * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
-     */
-    public $info_attr_transform_post = array();
-
-    /**
-     * Nested lookup array of content set name (Block, Inline) to
-     * element name to whether or not it belongs in that content set.
-     */
-    public $info_content_sets = array();
-
-    /**
-     * Indexed list of HTMLPurifier_Injector to be used.
-     */
-    public $info_injector = array();
-
-    /**
-     * Doctype object
-     */
-    public $doctype;
-
-
-
-    // RAW CUSTOMIZATION STUFF --------------------------------------------
-
-    /**
-     * Adds a custom attribute to a pre-existing element
-     * @note This is strictly convenience, and does not have a corresponding
-     *       method in HTMLPurifier_HTMLModule
-     * @param $element_name String element name to add attribute to
-     * @param $attr_name String name of attribute
-     * @param $def Attribute definition, can be string or object, see
-     *             HTMLPurifier_AttrTypes for details
-     */
-    public function addAttribute($element_name, $attr_name, $def) {
-        $module = $this->getAnonymousModule();
-        if (!isset($module->info[$element_name])) {
-            $element = $module->addBlankElement($element_name);
-        } else {
-            $element = $module->info[$element_name];
-        }
-        $element->attr[$attr_name] = $def;
-    }
-
-    /**
-     * Adds a custom element to your HTML definition
-     * @note See HTMLPurifier_HTMLModule::addElement for detailed
-     *       parameter and return value descriptions.
-     */
-    public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) {
-        $module = $this->getAnonymousModule();
-        // assume that if the user is calling this, the element
-        // is safe. This may not be a good idea
-        $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
-        return $element;
-    }
-
-    /**
-     * Adds a blank element to your HTML definition, for overriding
-     * existing behavior
-     * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
-     *       parameter and return value descriptions.
-     */
-    public function addBlankElement($element_name) {
-        $module  = $this->getAnonymousModule();
-        $element = $module->addBlankElement($element_name);
-        return $element;
-    }
-
-    /**
-     * Retrieves a reference to the anonymous module, so you can
-     * bust out advanced features without having to make your own
-     * module.
-     */
-    public function getAnonymousModule() {
-        if (!$this->_anonModule) {
-            $this->_anonModule = new HTMLPurifier_HTMLModule();
-            $this->_anonModule->name = 'Anonymous';
-        }
-        return $this->_anonModule;
-    }
-
-    private $_anonModule = null;
-
-
-    // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
-
-    public $type = 'HTML';
-    public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
-
-    /**
-     * Performs low-cost, preliminary initialization.
-     */
-    public function __construct() {
-        $this->manager = new HTMLPurifier_HTMLModuleManager();
-    }
-
-    protected function doSetup($config) {
-        $this->processModules($config);
-        $this->setupConfigStuff($config);
-        unset($this->manager);
-
-        // cleanup some of the element definitions
-        foreach ($this->info as $k => $v) {
-            unset($this->info[$k]->content_model);
-            unset($this->info[$k]->content_model_type);
-        }
-    }
-
-    /**
-     * Extract out the information from the manager
-     */
-    protected function processModules($config) {
-
-        if ($this->_anonModule) {
-            // for user specific changes
-            // this is late-loaded so we don't have to deal with PHP4
-            // reference wonky-ness
-            $this->manager->addModule($this->_anonModule);
-            unset($this->_anonModule);
-        }
-
-        $this->manager->setup($config);
-        $this->doctype = $this->manager->doctype;
-
-        foreach ($this->manager->modules as $module) {
-            foreach($module->info_tag_transform as $k => $v) {
-                if ($v === false) unset($this->info_tag_transform[$k]);
-                else $this->info_tag_transform[$k] = $v;
-            }
-            foreach($module->info_attr_transform_pre as $k => $v) {
-                if ($v === false) unset($this->info_attr_transform_pre[$k]);
-                else $this->info_attr_transform_pre[$k] = $v;
-            }
-            foreach($module->info_attr_transform_post as $k => $v) {
-                if ($v === false) unset($this->info_attr_transform_post[$k]);
-                else $this->info_attr_transform_post[$k] = $v;
-            }
-            foreach ($module->info_injector as $k => $v) {
-                if ($v === false) unset($this->info_injector[$k]);
-                else $this->info_injector[$k] = $v;
-            }
-        }
-
-        $this->info = $this->manager->getElements();
-        $this->info_content_sets = $this->manager->contentSets->lookup;
-
-    }
-
-    /**
-     * Sets up stuff based on config. We need a better way of doing this.
-     */
-    protected function setupConfigStuff($config) {
-
-        $block_wrapper = $config->get('HTML.BlockWrapper');
-        if (isset($this->info_content_sets['Block'][$block_wrapper])) {
-            $this->info_block_wrapper = $block_wrapper;
-        } else {
-            trigger_error('Cannot use non-block element as block wrapper',
-                E_USER_ERROR);
-        }
-
-        $parent = $config->get('HTML.Parent');
-        $def = $this->manager->getElement($parent, true);
-        if ($def) {
-            $this->info_parent = $parent;
-            $this->info_parent_def = $def;
-        } else {
-            trigger_error('Cannot use unrecognized element as parent',
-                E_USER_ERROR);
-            $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
-        }
-
-        // support template text
-        $support = "(for information on implementing this, see the ".
-                   "support forums) ";
-
-        // setup allowed elements -----------------------------------------
-
-        $allowed_elements = $config->get('HTML.AllowedElements');
-        $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
-
-        if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
-            $allowed = $config->get('HTML.Allowed');
-            if (is_string($allowed)) {
-                list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
-            }
-        }
-
-        if (is_array($allowed_elements)) {
-            foreach ($this->info as $name => $d) {
-                if(!isset($allowed_elements[$name])) unset($this->info[$name]);
-                unset($allowed_elements[$name]);
-            }
-            // emit errors
-            foreach ($allowed_elements as $element => $d) {
-                $element = htmlspecialchars($element, ENT_COMPAT | ENT_HTML401, 'UTF-8', false); // PHP doesn't escape errors, be careful!
-                trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
-            }
-        }
-
-        // setup allowed attributes ---------------------------------------
-
-        $allowed_attributes_mutable = $allowed_attributes; // by copy!
-        if (is_array($allowed_attributes)) {
-
-            // This actually doesn't do anything, since we went away from
-            // global attributes. It's possible that userland code uses
-            // it, but HTMLModuleManager doesn't!
-            foreach ($this->info_global_attr as $attr => $x) {
-                $keys = array($attr, "*@$attr", "*.$attr");
-                $delete = true;
-                foreach ($keys as $key) {
-                    if ($delete && isset($allowed_attributes[$key])) {
-                        $delete = false;
-                    }
-                    if (isset($allowed_attributes_mutable[$key])) {
-                        unset($allowed_attributes_mutable[$key]);
-                    }
-                }
-                if ($delete) unset($this->info_global_attr[$attr]);
-            }
-
-            foreach ($this->info as $tag => $info) {
-                foreach ($info->attr as $attr => $x) {
-                    $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
-                    $delete = true;
-                    foreach ($keys as $key) {
-                        if ($delete && isset($allowed_attributes[$key])) {
-                            $delete = false;
-                        }
-                        if (isset($allowed_attributes_mutable[$key])) {
-                            unset($allowed_attributes_mutable[$key]);
-                        }
-                    }
-                    if ($delete) {
-                        if ($this->info[$tag]->attr[$attr]->required) {
-                            trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING);
-                        }
-                        unset($this->info[$tag]->attr[$attr]);
-                    }
-                }
-            }
-            // emit errors
-            foreach ($allowed_attributes_mutable as $elattr => $d) {
-                $bits = preg_split('/[.@]/', $elattr, 2);
-                $c = count($bits);
-                switch ($c) {
-                    case 2:
-                        if ($bits[0] !== '*') {
-                            $element = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
-                            $attribute = htmlspecialchars($bits[1], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
-                            if (!isset($this->info[$element])) {
-                                trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support");
-                            } else {
-                                trigger_error("Attribute '$attribute' in element '$element' not supported $support",
-                                    E_USER_WARNING);
-                            }
-                            break;
-                        }
-                        // otherwise fall through
-                    case 1:
-                        $attribute = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
-                        trigger_error("Global attribute '$attribute' is not ".
-                            "supported in any elements $support",
-                            E_USER_WARNING);
-                        break;
-                }
-            }
-
-        }
-
-        // setup forbidden elements ---------------------------------------
-
-        $forbidden_elements   = $config->get('HTML.ForbiddenElements');
-        $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
-
-        foreach ($this->info as $tag => $info) {
-            if (isset($forbidden_elements[$tag])) {
-                unset($this->info[$tag]);
-                continue;
-            }
-            foreach ($info->attr as $attr => $x) {
-                if (
-                    isset($forbidden_attributes["$tag@$attr"]) ||
-                    isset($forbidden_attributes["*@$attr"]) ||
-                    isset($forbidden_attributes[$attr])
-                ) {
-                    unset($this->info[$tag]->attr[$attr]);
-                    continue;
-                } // this segment might get removed eventually
-                elseif (isset($forbidden_attributes["$tag.$attr"])) {
-                    // $tag.$attr are not user supplied, so no worries!
-                    trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING);
-                }
-            }
-        }
-        foreach ($forbidden_attributes as $key => $v) {
-            if (strlen($key) < 2) continue;
-            if ($key[0] != '*') continue;
-            if ($key[1] == '.') {
-                trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
-            }
-        }
-
-        // setup injectors -----------------------------------------------------
-        foreach ($this->info_injector as $i => $injector) {
-            if ($injector->checkNeeded($config) !== false) {
-                // remove injector that does not have it's required
-                // elements/attributes present, and is thus not needed.
-                unset($this->info_injector[$i]);
-            }
-        }
-    }
-
-    /**
-     * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
-     * separate lists for processing. Format is element[attr1|attr2],element2...
-     * @warning Although it's largely drawn from TinyMCE's implementation,
-     *      it is different, and you'll probably have to modify your lists
-     * @param $list String list to parse
-     * @param array($allowed_elements, $allowed_attributes)
-     * @todo Give this its own class, probably static interface
-     */
-    public function parseTinyMCEAllowedList($list) {
-
-        $list = str_replace(array(' ', "\t"), '', $list);
-
-        $elements = array();
-        $attributes = array();
-
-        $chunks = preg_split('/(,|[\n\r]+)/', $list);
-        foreach ($chunks as $chunk) {
-            if (empty($chunk)) continue;
-            // remove TinyMCE element control characters
-            if (!strpos($chunk, '[')) {
-                $element = $chunk;
-                $attr = false;
-            } else {
-                list($element, $attr) = explode('[', $chunk);
-            }
-            if ($element !== '*') $elements[$element] = true;
-            if (!$attr) continue;
-            $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
-            $attr = explode('|', $attr);
-            foreach ($attr as $key) {
-                $attributes["$element.$key"] = true;
-            }
-        }
-
-        return array($elements, $attributes);
-
-    }
+	// FULLY-PUBLIC VARIABLES ---------------------------------------------
+
+	/**
+	 * Associative array of element names to HTMLPurifier_ElementDef
+	 */
+	public $info = array();
+
+	/**
+	 * Associative array of global attribute name to attribute definition.
+	 */
+	public $info_global_attr = array();
+
+	/**
+	 * String name of parent element HTML will be going into.
+	 */
+	public $info_parent = 'div';
+
+	/**
+	 * Definition for parent element, allows parent element to be a
+	 * tag that's not allowed inside the HTML fragment.
+	 */
+	public $info_parent_def;
+
+	/**
+	 * String name of element used to wrap inline elements in block context
+	 * @note This is rarely used except for BLOCKQUOTEs in strict mode
+	 */
+	public $info_block_wrapper = 'p';
+
+	/**
+	 * Associative array of deprecated tag name to HTMLPurifier_TagTransform
+	 */
+	public $info_tag_transform = array();
+
+	/**
+	 * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
+	 */
+	public $info_attr_transform_pre = array();
+
+	/**
+	 * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
+	 */
+	public $info_attr_transform_post = array();
+
+	/**
+	 * Nested lookup array of content set name (Block, Inline) to
+	 * element name to whether or not it belongs in that content set.
+	 */
+	public $info_content_sets = array();
+
+	/**
+	 * Indexed list of HTMLPurifier_Injector to be used.
+	 */
+	public $info_injector = array();
+
+	/**
+	 * Doctype object
+	 */
+	public $doctype;
+
+
+
+	// RAW CUSTOMIZATION STUFF --------------------------------------------
+
+	/**
+	 * Adds a custom attribute to a pre-existing element
+	 * @note This is strictly convenience, and does not have a corresponding
+	 *       method in HTMLPurifier_HTMLModule
+	 * @param $element_name String element name to add attribute to
+	 * @param $attr_name String name of attribute
+	 * @param $def Attribute definition, can be string or object, see
+	 *             HTMLPurifier_AttrTypes for details
+	 */
+	public function addAttribute($element_name, $attr_name, $def) {
+		$module = $this->getAnonymousModule();
+		if (!isset($module->info[$element_name])) {
+			$element = $module->addBlankElement($element_name);
+		} else {
+			$element = $module->info[$element_name];
+		}
+		$element->attr[$attr_name] = $def;
+	}
+
+	/**
+	 * Adds a custom element to your HTML definition
+	 * @note See HTMLPurifier_HTMLModule::addElement for detailed
+	 *       parameter and return value descriptions.
+	 */
+	public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) {
+		$module = $this->getAnonymousModule();
+		// assume that if the user is calling this, the element
+		// is safe. This may not be a good idea
+		$element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
+		return $element;
+	}
+
+	/**
+	 * Adds a blank element to your HTML definition, for overriding
+	 * existing behavior
+	 * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
+	 *       parameter and return value descriptions.
+	 */
+	public function addBlankElement($element_name) {
+		$module  = $this->getAnonymousModule();
+		$element = $module->addBlankElement($element_name);
+		return $element;
+	}
+
+	/**
+	 * Retrieves a reference to the anonymous module, so you can
+	 * bust out advanced features without having to make your own
+	 * module.
+	 */
+	public function getAnonymousModule() {
+		if (!$this->_anonModule) {
+			$this->_anonModule = new HTMLPurifier_HTMLModule();
+			$this->_anonModule->name = 'Anonymous';
+		}
+		return $this->_anonModule;
+	}
+
+	private $_anonModule = null;
+
+
+	// PUBLIC BUT INTERNAL VARIABLES --------------------------------------
+
+	public $type = 'HTML';
+	public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
+
+	/**
+	 * Performs low-cost, preliminary initialization.
+	 */
+	public function __construct() {
+		$this->manager = new HTMLPurifier_HTMLModuleManager();
+	}
+
+	protected function doSetup($config) {
+		$this->processModules($config);
+		$this->setupConfigStuff($config);
+		unset($this->manager);
+
+		// cleanup some of the element definitions
+		foreach ($this->info as $k => $v) {
+			unset($this->info[$k]->content_model);
+			unset($this->info[$k]->content_model_type);
+		}
+	}
+
+	/**
+	 * Extract out the information from the manager
+	 */
+	protected function processModules($config) {
+
+		if ($this->_anonModule) {
+			// for user specific changes
+			// this is late-loaded so we don't have to deal with PHP4
+			// reference wonky-ness
+			$this->manager->addModule($this->_anonModule);
+			unset($this->_anonModule);
+		}
+
+		$this->manager->setup($config);
+		$this->doctype = $this->manager->doctype;
+
+		foreach ($this->manager->modules as $module) {
+			foreach($module->info_tag_transform as $k => $v) {
+				if ($v === false) unset($this->info_tag_transform[$k]);
+				else $this->info_tag_transform[$k] = $v;
+			}
+			foreach($module->info_attr_transform_pre as $k => $v) {
+				if ($v === false) unset($this->info_attr_transform_pre[$k]);
+				else $this->info_attr_transform_pre[$k] = $v;
+			}
+			foreach($module->info_attr_transform_post as $k => $v) {
+				if ($v === false) unset($this->info_attr_transform_post[$k]);
+				else $this->info_attr_transform_post[$k] = $v;
+			}
+			foreach ($module->info_injector as $k => $v) {
+				if ($v === false) unset($this->info_injector[$k]);
+				else $this->info_injector[$k] = $v;
+			}
+		}
+
+		$this->info = $this->manager->getElements();
+		$this->info_content_sets = $this->manager->contentSets->lookup;
+
+	}
+
+	/**
+	 * Sets up stuff based on config. We need a better way of doing this.
+	 */
+	protected function setupConfigStuff($config) {
+
+		$block_wrapper = $config->get('HTML.BlockWrapper');
+		if (isset($this->info_content_sets['Block'][$block_wrapper])) {
+			$this->info_block_wrapper = $block_wrapper;
+		} else {
+			trigger_error('Cannot use non-block element as block wrapper',
+				E_USER_ERROR);
+		}
+
+		$parent = $config->get('HTML.Parent');
+		$def = $this->manager->getElement($parent, true);
+		if ($def) {
+			$this->info_parent = $parent;
+			$this->info_parent_def = $def;
+		} else {
+			trigger_error('Cannot use unrecognized element as parent',
+				E_USER_ERROR);
+			$this->info_parent_def = $this->manager->getElement($this->info_parent, true);
+		}
+
+		// support template text
+		$support = "(for information on implementing this, see the ".
+				   "support forums) ";
+
+		// setup allowed elements -----------------------------------------
+
+		$allowed_elements = $config->get('HTML.AllowedElements');
+		$allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
+
+		if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
+			$allowed = $config->get('HTML.Allowed');
+			if (is_string($allowed)) {
+				list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
+			}
+		}
+
+		if (is_array($allowed_elements)) {
+			foreach ($this->info as $name => $d) {
+				if(!isset($allowed_elements[$name])) unset($this->info[$name]);
+				unset($allowed_elements[$name]);
+			}
+			// emit errors
+			foreach ($allowed_elements as $element => $d) {
+				$element = htmlspecialchars($element, ENT_COMPAT | ENT_HTML401, 'UTF-8', false); // PHP doesn't escape errors, be careful!
+				trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
+			}
+		}
+
+		// setup allowed attributes ---------------------------------------
+
+		$allowed_attributes_mutable = $allowed_attributes; // by copy!
+		if (is_array($allowed_attributes)) {
+
+			// This actually doesn't do anything, since we went away from
+			// global attributes. It's possible that userland code uses
+			// it, but HTMLModuleManager doesn't!
+			foreach ($this->info_global_attr as $attr => $x) {
+				$keys = array($attr, "*@$attr", "*.$attr");
+				$delete = true;
+				foreach ($keys as $key) {
+					if ($delete && isset($allowed_attributes[$key])) {
+						$delete = false;
+					}
+					if (isset($allowed_attributes_mutable[$key])) {
+						unset($allowed_attributes_mutable[$key]);
+					}
+				}
+				if ($delete) unset($this->info_global_attr[$attr]);
+			}
+
+			foreach ($this->info as $tag => $info) {
+				foreach ($info->attr as $attr => $x) {
+					$keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
+					$delete = true;
+					foreach ($keys as $key) {
+						if ($delete && isset($allowed_attributes[$key])) {
+							$delete = false;
+						}
+						if (isset($allowed_attributes_mutable[$key])) {
+							unset($allowed_attributes_mutable[$key]);
+						}
+					}
+					if ($delete) {
+						if ($this->info[$tag]->attr[$attr]->required) {
+							trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING);
+						}
+						unset($this->info[$tag]->attr[$attr]);
+					}
+				}
+			}
+			// emit errors
+			foreach ($allowed_attributes_mutable as $elattr => $d) {
+				$bits = preg_split('/[.@]/', $elattr, 2);
+				$c = count($bits);
+				switch ($c) {
+					case 2:
+						if ($bits[0] !== '*') {
+							$element = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
+							$attribute = htmlspecialchars($bits[1], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
+							if (!isset($this->info[$element])) {
+								trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support");
+							} else {
+								trigger_error("Attribute '$attribute' in element '$element' not supported $support",
+									E_USER_WARNING);
+							}
+							break;
+						}
+						// otherwise fall through
+					case 1:
+						$attribute = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
+						trigger_error("Global attribute '$attribute' is not ".
+							"supported in any elements $support",
+							E_USER_WARNING);
+						break;
+				}
+			}
+
+		}
+
+		// setup forbidden elements ---------------------------------------
+
+		$forbidden_elements   = $config->get('HTML.ForbiddenElements');
+		$forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
+
+		foreach ($this->info as $tag => $info) {
+			if (isset($forbidden_elements[$tag])) {
+				unset($this->info[$tag]);
+				continue;
+			}
+			foreach ($info->attr as $attr => $x) {
+				if (
+					isset($forbidden_attributes["$tag@$attr"]) ||
+					isset($forbidden_attributes["*@$attr"]) ||
+					isset($forbidden_attributes[$attr])
+				) {
+					unset($this->info[$tag]->attr[$attr]);
+					continue;
+				} // this segment might get removed eventually
+				elseif (isset($forbidden_attributes["$tag.$attr"])) {
+					// $tag.$attr are not user supplied, so no worries!
+					trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING);
+				}
+			}
+		}
+		foreach ($forbidden_attributes as $key => $v) {
+			if (strlen($key) < 2) continue;
+			if ($key[0] != '*') continue;
+			if ($key[1] == '.') {
+				trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
+			}
+		}
+
+		// setup injectors -----------------------------------------------------
+		foreach ($this->info_injector as $i => $injector) {
+			if ($injector->checkNeeded($config) !== false) {
+				// remove injector that does not have it's required
+				// elements/attributes present, and is thus not needed.
+				unset($this->info_injector[$i]);
+			}
+		}
+	}
+
+	/**
+	 * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
+	 * separate lists for processing. Format is element[attr1|attr2],element2...
+	 * @warning Although it's largely drawn from TinyMCE's implementation,
+	 *      it is different, and you'll probably have to modify your lists
+	 * @param $list String list to parse
+	 * @param array($allowed_elements, $allowed_attributes)
+	 * @todo Give this its own class, probably static interface
+	 */
+	public function parseTinyMCEAllowedList($list) {
+
+		$list = str_replace(array(' ', "\t"), '', $list);
+
+		$elements = array();
+		$attributes = array();
+
+		$chunks = preg_split('/(,|[\n\r]+)/', $list);
+		foreach ($chunks as $chunk) {
+			if (empty($chunk)) continue;
+			// remove TinyMCE element control characters
+			if (!strpos($chunk, '[')) {
+				$element = $chunk;
+				$attr = false;
+			} else {
+				list($element, $attr) = explode('[', $chunk);
+			}
+			if ($element !== '*') $elements[$element] = true;
+			if (!$attr) continue;
+			$attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
+			$attr = explode('|', $attr);
+			foreach ($attr as $key) {
+				$attributes["$element.$key"] = true;
+			}
+		}
+
+		return array($elements, $attributes);
+
+	}
 
 
 }

Please login to merge, or discard this patch.

Braces +41 added lines, -15 removed lines patch added patch discarded remove patch

@@ -192,20 +192,32 @@  discard block
 block discarded – undo
 
         foreach ($this->manager->modules as $module) {
             foreach($module->info_tag_transform as $k => $v) {
-                if ($v === false) unset($this->info_tag_transform[$k]);
-                else $this->info_tag_transform[$k] = $v;
+                if ($v === false) {
+                	unset($this->info_tag_transform[$k]);
+                } else {
+                	$this->info_tag_transform[$k] = $v;
+                }
             }
             foreach($module->info_attr_transform_pre as $k => $v) {
-                if ($v === false) unset($this->info_attr_transform_pre[$k]);
-                else $this->info_attr_transform_pre[$k] = $v;
+                if ($v === false) {
+                	unset($this->info_attr_transform_pre[$k]);
+                } else {
+                	$this->info_attr_transform_pre[$k] = $v;
+                }
             }
             foreach($module->info_attr_transform_post as $k => $v) {
-                if ($v === false) unset($this->info_attr_transform_post[$k]);
-                else $this->info_attr_transform_post[$k] = $v;
+                if ($v === false) {
+                	unset($this->info_attr_transform_post[$k]);
+                } else {
+                	$this->info_attr_transform_post[$k] = $v;
+                }
             }
             foreach ($module->info_injector as $k => $v) {
-                if ($v === false) unset($this->info_injector[$k]);
-                else $this->info_injector[$k] = $v;
+                if ($v === false) {
+                	unset($this->info_injector[$k]);
+                } else {
+                	$this->info_injector[$k] = $v;
+                }
             }
         }
 
@@ -256,7 +268,9 @@  discard block
 block discarded – undo
 
         if (is_array($allowed_elements)) {
             foreach ($this->info as $name => $d) {
-                if(!isset($allowed_elements[$name])) unset($this->info[$name]);
+                if(!isset($allowed_elements[$name])) {
+                	unset($this->info[$name]);
+                }
                 unset($allowed_elements[$name]);
             }
             // emit errors
@@ -285,7 +299,9 @@  discard block
 block discarded – undo
                         unset($allowed_attributes_mutable[$key]);
                     }
                 }
-                if ($delete) unset($this->info_global_attr[$attr]);
+                if ($delete) {
+                	unset($this->info_global_attr[$attr]);
+                }
             }
 
             foreach ($this->info as $tag => $info) {
@@ -363,8 +379,12 @@  discard block
 block discarded – undo
             }
         }
         foreach ($forbidden_attributes as $key => $v) {
-            if (strlen($key) < 2) continue;
-            if ($key[0] != '*') continue;
+            if (strlen($key) < 2) {
+            	continue;
+            }
+            if ($key[0] != '*') {
+            	continue;
+            }
             if ($key[1] == '.') {
                 trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
             }
@@ -398,7 +418,9 @@  discard block
 block discarded – undo
 
         $chunks = preg_split('/(,|[\n\r]+)/', $list);
         foreach ($chunks as $chunk) {
-            if (empty($chunk)) continue;
+            if (empty($chunk)) {
+            	continue;
+            }
             // remove TinyMCE element control characters
             if (!strpos($chunk, '[')) {
                 $element = $chunk;
@@ -406,8 +428,12 @@  discard block
 block discarded – undo
             } else {
                 list($element, $attr) = explode('[', $chunk);
             }
-            if ($element !== '*') $elements[$element] = true;
-            if (!$attr) continue;
+            if ($element !== '*') {
+            	$elements[$element] = true;
+            }
+            if (!$attr) {
+            	continue;
+            }
             $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
             $attr = explode('|', $attr);
             foreach ($attr as $key) {

Please login to merge, or discard this patch.

Spacing +4 added lines, -4 removed lines patch added patch discarded remove patch

@@ -191,15 +191,15 @@  discard block
 block discarded – undo
         $this->doctype = $this->manager->doctype;
 
         foreach ($this->manager->modules as $module) {
-            foreach($module->info_tag_transform as $k => $v) {
+            foreach ($module->info_tag_transform as $k => $v) {
                 if ($v === false) unset($this->info_tag_transform[$k]);
                 else $this->info_tag_transform[$k] = $v;
             }
-            foreach($module->info_attr_transform_pre as $k => $v) {
+            foreach ($module->info_attr_transform_pre as $k => $v) {
                 if ($v === false) unset($this->info_attr_transform_pre[$k]);
                 else $this->info_attr_transform_pre[$k] = $v;
             }
-            foreach($module->info_attr_transform_post as $k => $v) {
+            foreach ($module->info_attr_transform_post as $k => $v) {
                 if ($v === false) unset($this->info_attr_transform_post[$k]);
                 else $this->info_attr_transform_post[$k] = $v;
             }
@@ -256,7 +256,7 @@  discard block
 block discarded – undo
 
         if (is_array($allowed_elements)) {
             foreach ($this->info as $name => $d) {
-                if(!isset($allowed_elements[$name])) unset($this->info[$name]);
+                if (!isset($allowed_elements[$name])) unset($this->info[$name]);
                 unset($allowed_elements[$name]);
             }
             // emit errors

Please login to merge, or discard this patch.

classes/security/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php 4 patches

Doc Comments +2 added lines, -1 removed lines patch added patch discarded remove patch

@@ -179,6 +179,7 @@  discard block
 block discarded – undo
     /**
      * Adds a module to the current doctype by first registering it,
      * and then tacking it on to the active doctype
+     * @param HTMLPurifier_HTMLModule $module
      */
     public function addModule($module) {
         $this->registerModule($module);
@@ -325,7 +326,7 @@  discard block
 block discarded – undo
     /**
      * Retrieves a single merged element definition
      * @param $name Name of element
-     * @param $trusted Boolean trusted overriding parameter: set to true
+     * @param boolean $trusted Boolean trusted overriding parameter: set to true
      *                 if you want the full version of an element
      * @return Merged HTMLPurifier_ElementDef
      * @note You may notice that modules are getting iterated over twice (once

Please login to merge, or discard this patch.

Indentation +406 added lines, -406 removed lines patch added patch discarded remove patch

@@ -3,412 +3,412 @@
 block discarded – undo
 class HTMLPurifier_HTMLModuleManager
 {
 
-    /**
-     * Instance of HTMLPurifier_DoctypeRegistry
-     */
-    public $doctypes;
-
-    /**
-     * Instance of current doctype
-     */
-    public $doctype;
-
-    /**
-     * Instance of HTMLPurifier_AttrTypes
-     */
-    public $attrTypes;
-
-    /**
-     * Active instances of modules for the specified doctype are
-     * indexed, by name, in this array.
-     */
-    public $modules = array();
-
-    /**
-     * Array of recognized HTMLPurifier_Module instances, indexed by
-     * module's class name. This array is usually lazy loaded, but a
-     * user can overload a module by pre-emptively registering it.
-     */
-    public $registeredModules = array();
-
-    /**
-     * List of extra modules that were added by the user using addModule().
-     * These get unconditionally merged into the current doctype, whatever
-     * it may be.
-     */
-    public $userModules = array();
-
-    /**
-     * Associative array of element name to list of modules that have
-     * definitions for the element; this array is dynamically filled.
-     */
-    public $elementLookup = array();
-
-    /** List of prefixes we should use for registering small names */
-    public $prefixes = array('HTMLPurifier_HTMLModule_');
-
-    public $contentSets;     /**< Instance of HTMLPurifier_ContentSets */
-    public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
-
-    /** If set to true, unsafe elements and attributes will be allowed */
-    public $trusted = false;
-
-    public function __construct() {
-
-        // editable internal objects
-        $this->attrTypes = new HTMLPurifier_AttrTypes();
-        $this->doctypes  = new HTMLPurifier_DoctypeRegistry();
-
-        // setup basic modules
-        $common = array(
-            'CommonAttributes', 'Text', 'Hypertext', 'List',
-            'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
-            'StyleAttribute',
-            // Unsafe:
-            'Scripting', 'Object', 'Forms',
-            // Sorta legacy, but present in strict:
-            'Name',
-        );
-        $transitional = array('Legacy', 'Target', 'Iframe');
-        $xml = array('XMLCommonAttributes');
-        $non_xml = array('NonXMLCommonAttributes');
-
-        // setup basic doctypes
-        $this->doctypes->register(
-            'HTML 4.01 Transitional', false,
-            array_merge($common, $transitional, $non_xml),
-            array('Tidy_Transitional', 'Tidy_Proprietary'),
-            array(),
-            '-//W3C//DTD HTML 4.01 Transitional//EN',
-            'http://www.w3.org/TR/html4/loose.dtd'
-        );
-
-        $this->doctypes->register(
-            'HTML 4.01 Strict', false,
-            array_merge($common, $non_xml),
-            array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
-            array(),
-            '-//W3C//DTD HTML 4.01//EN',
-            'http://www.w3.org/TR/html4/strict.dtd'
-        );
-
-        $this->doctypes->register(
-            'XHTML 1.0 Transitional', true,
-            array_merge($common, $transitional, $xml, $non_xml),
-            array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
-            array(),
-            '-//W3C//DTD XHTML 1.0 Transitional//EN',
-            'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
-        );
-
-        $this->doctypes->register(
-            'XHTML 1.0 Strict', true,
-            array_merge($common, $xml, $non_xml),
-            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
-            array(),
-            '-//W3C//DTD XHTML 1.0 Strict//EN',
-            'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
-        );
-
-        $this->doctypes->register(
-            'XHTML 1.1', true,
-            // Iframe is a real XHTML 1.1 module, despite being
-            // "transitional"!
-            array_merge($common, $xml, array('Ruby', 'Iframe')),
-            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
-            array(),
-            '-//W3C//DTD XHTML 1.1//EN',
-            'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
-        );
-
-    }
-
-    /**
-     * Registers a module to the recognized module list, useful for
-     * overloading pre-existing modules.
-     * @param $module Mixed: string module name, with or without
-     *                HTMLPurifier_HTMLModule prefix, or instance of
-     *                subclass of HTMLPurifier_HTMLModule.
-     * @param $overload Boolean whether or not to overload previous modules.
-     *                  If this is not set, and you do overload a module,
-     *                  HTML Purifier will complain with a warning.
-     * @note This function will not call autoload, you must instantiate
-     *       (and thus invoke) autoload outside the method.
-     * @note If a string is passed as a module name, different variants
-     *       will be tested in this order:
-     *          - Check for HTMLPurifier_HTMLModule_$name
-     *          - Check all prefixes with $name in order they were added
-     *          - Check for literal object name
-     *          - Throw fatal error
-     *       If your object name collides with an internal class, specify
-     *       your module manually. All modules must have been included
-     *       externally: registerModule will not perform inclusions for you!
-     */
-    public function registerModule($module, $overload = false) {
-        if (is_string($module)) {
-            // attempt to load the module
-            $original_module = $module;
-            $ok = false;
-            foreach ($this->prefixes as $prefix) {
-                $module = $prefix . $original_module;
-                if (class_exists($module)) {
-                    $ok = true;
-                    break;
-                }
-            }
-            if (!$ok) {
-                $module = $original_module;
-                if (!class_exists($module)) {
-                    trigger_error($original_module . ' module does not exist',
-                        E_USER_ERROR);
-                    return;
-                }
-            }
-            $module = new $module();
-        }
-        if (empty($module->name)) {
-            trigger_error('Module instance of ' . get_class($module) . ' must have name');
-            return;
-        }
-        if (!$overload && isset($this->registeredModules[$module->name])) {
-            trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
-        }
-        $this->registeredModules[$module->name] = $module;
-    }
-
-    /**
-     * Adds a module to the current doctype by first registering it,
-     * and then tacking it on to the active doctype
-     */
-    public function addModule($module) {
-        $this->registerModule($module);
-        if (is_object($module)) $module = $module->name;
-        $this->userModules[] = $module;
-    }
-
-    /**
-     * Adds a class prefix that registerModule() will use to resolve a
-     * string name to a concrete class
-     */
-    public function addPrefix($prefix) {
-        $this->prefixes[] = $prefix;
-    }
-
-    /**
-     * Performs processing on modules, after being called you may
-     * use getElement() and getElements()
-     * @param $config Instance of HTMLPurifier_Config
-     */
-    public function setup($config) {
-
-        $this->trusted = $config->get('HTML.Trusted');
-
-        // generate
-        $this->doctype = $this->doctypes->make($config);
-        $modules = $this->doctype->modules;
-
-        // take out the default modules that aren't allowed
-        $lookup = $config->get('HTML.AllowedModules');
-        $special_cases = $config->get('HTML.CoreModules');
-
-        if (is_array($lookup)) {
-            foreach ($modules as $k => $m) {
-                if (isset($special_cases[$m])) continue;
-                if (!isset($lookup[$m])) unset($modules[$k]);
-            }
-        }
-
-        // custom modules
-        if ($config->get('HTML.Proprietary')) {
-            $modules[] = 'Proprietary';
-        }
-        if ($config->get('HTML.SafeObject')) {
-            $modules[] = 'SafeObject';
-        }
-        if ($config->get('HTML.SafeEmbed')) {
-            $modules[] = 'SafeEmbed';
-        }
-        if ($config->get('HTML.Nofollow')) {
-            $modules[] = 'Nofollow';
-        }
-        if ($config->get('HTML.TargetBlank')) {
-            $modules[] = 'TargetBlank';
-        }
-
-        // merge in custom modules
-        $modules = array_merge($modules, $this->userModules);
-
-        foreach ($modules as $module) {
-            $this->processModule($module);
-            $this->modules[$module]->setup($config);
-        }
-
-        foreach ($this->doctype->tidyModules as $module) {
-            $this->processModule($module);
-            $this->modules[$module]->setup($config);
-        }
-
-        // prepare any injectors
-        foreach ($this->modules as $module) {
-            $n = array();
-            foreach ($module->info_injector as $i => $injector) {
-                if (!is_object($injector)) {
-                    $class = "HTMLPurifier_Injector_$injector";
-                    $injector = new $class;
-                }
-                $n[$injector->name] = $injector;
-            }
-            $module->info_injector = $n;
-        }
-
-        // setup lookup table based on all valid modules
-        foreach ($this->modules as $module) {
-            foreach ($module->info as $name => $def) {
-                if (!isset($this->elementLookup[$name])) {
-                    $this->elementLookup[$name] = array();
-                }
-                $this->elementLookup[$name][] = $module->name;
-            }
-        }
-
-        // note the different choice
-        $this->contentSets = new HTMLPurifier_ContentSets(
-            // content set assembly deals with all possible modules,
-            // not just ones deemed to be "safe"
-            $this->modules
-        );
-        $this->attrCollections = new HTMLPurifier_AttrCollections(
-            $this->attrTypes,
-            // there is no way to directly disable a global attribute,
-            // but using AllowedAttributes or simply not including
-            // the module in your custom doctype should be sufficient
-            $this->modules
-        );
-    }
-
-    /**
-     * Takes a module and adds it to the active module collection,
-     * registering it if necessary.
-     */
-    public function processModule($module) {
-        if (!isset($this->registeredModules[$module]) || is_object($module)) {
-            $this->registerModule($module);
-        }
-        $this->modules[$module] = $this->registeredModules[$module];
-    }
-
-    /**
-     * Retrieves merged element definitions.
-     * @return Array of HTMLPurifier_ElementDef
-     */
-    public function getElements() {
-
-        $elements = array();
-        foreach ($this->modules as $module) {
-            if (!$this->trusted && !$module->safe) continue;
-            foreach ($module->info as $name => $v) {
-                if (isset($elements[$name])) continue;
-                $elements[$name] = $this->getElement($name);
-            }
-        }
-
-        // remove dud elements, this happens when an element that
-        // appeared to be safe actually wasn't
-        foreach ($elements as $n => $v) {
-            if ($v === false) unset($elements[$n]);
-        }
-
-        return $elements;
-
-    }
-
-    /**
-     * Retrieves a single merged element definition
-     * @param $name Name of element
-     * @param $trusted Boolean trusted overriding parameter: set to true
-     *                 if you want the full version of an element
-     * @return Merged HTMLPurifier_ElementDef
-     * @note You may notice that modules are getting iterated over twice (once
-     *       in getElements() and once here). This
-     *       is because
-     */
-    public function getElement($name, $trusted = null) {
-
-        if (!isset($this->elementLookup[$name])) {
-            return false;
-        }
-
-        // setup global state variables
-        $def = false;
-        if ($trusted === null) $trusted = $this->trusted;
-
-        // iterate through each module that has registered itself to this
-        // element
-        foreach($this->elementLookup[$name] as $module_name) {
-
-            $module = $this->modules[$module_name];
-
-            // refuse to create/merge from a module that is deemed unsafe--
-            // pretend the module doesn't exist--when trusted mode is not on.
-            if (!$trusted && !$module->safe) {
-                continue;
-            }
-
-            // clone is used because, ideally speaking, the original
-            // definition should not be modified. Usually, this will
-            // make no difference, but for consistency's sake
-            $new_def = clone $module->info[$name];
-
-            if (!$def && $new_def->standalone) {
-                $def = $new_def;
-            } elseif ($def) {
-                // This will occur even if $new_def is standalone. In practice,
-                // this will usually result in a full replacement.
-                $def->mergeIn($new_def);
-            } else {
-                // :TODO:
-                // non-standalone definitions that don't have a standalone
-                // to merge into could be deferred to the end
-                // HOWEVER, it is perfectly valid for a non-standalone
-                // definition to lack a standalone definition, even
-                // after all processing: this allows us to safely
-                // specify extra attributes for elements that may not be
-                // enabled all in one place.  In particular, this might
-                // be the case for trusted elements.  WARNING: care must
-                // be taken that the /extra/ definitions are all safe.
-                continue;
-            }
-
-            // attribute value expansions
-            $this->attrCollections->performInclusions($def->attr);
-            $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
-
-            // descendants_are_inline, for ChildDef_Chameleon
-            if (is_string($def->content_model) &&
-                strpos($def->content_model, 'Inline') !== false) {
-                if ($name != 'del' && $name != 'ins') {
-                    // this is for you, ins/del
-                    $def->descendants_are_inline = true;
-                }
-            }
-
-            $this->contentSets->generateChildDef($def, $module);
-        }
-
-        // This can occur if there is a blank definition, but no base to
-        // mix it in with
-        if (!$def) return false;
-
-        // add information on required attributes
-        foreach ($def->attr as $attr_name => $attr_def) {
-            if ($attr_def->required) {
-                $def->required_attr[] = $attr_name;
-            }
-        }
-
-        return $def;
-
-    }
+	/**
+	 * Instance of HTMLPurifier_DoctypeRegistry
+	 */
+	public $doctypes;
+
+	/**
+	 * Instance of current doctype
+	 */
+	public $doctype;
+
+	/**
+	 * Instance of HTMLPurifier_AttrTypes
+	 */
+	public $attrTypes;
+
+	/**
+	 * Active instances of modules for the specified doctype are
+	 * indexed, by name, in this array.
+	 */
+	public $modules = array();
+
+	/**
+	 * Array of recognized HTMLPurifier_Module instances, indexed by
+	 * module's class name. This array is usually lazy loaded, but a
+	 * user can overload a module by pre-emptively registering it.
+	 */
+	public $registeredModules = array();
+
+	/**
+	 * List of extra modules that were added by the user using addModule().
+	 * These get unconditionally merged into the current doctype, whatever
+	 * it may be.
+	 */
+	public $userModules = array();
+
+	/**
+	 * Associative array of element name to list of modules that have
+	 * definitions for the element; this array is dynamically filled.
+	 */
+	public $elementLookup = array();
+
+	/** List of prefixes we should use for registering small names */
+	public $prefixes = array('HTMLPurifier_HTMLModule_');
+
+	public $contentSets;     /**< Instance of HTMLPurifier_ContentSets */
+	public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
+
+	/** If set to true, unsafe elements and attributes will be allowed */
+	public $trusted = false;
+
+	public function __construct() {
+
+		// editable internal objects
+		$this->attrTypes = new HTMLPurifier_AttrTypes();
+		$this->doctypes  = new HTMLPurifier_DoctypeRegistry();
+
+		// setup basic modules
+		$common = array(
+			'CommonAttributes', 'Text', 'Hypertext', 'List',
+			'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
+			'StyleAttribute',
+			// Unsafe:
+			'Scripting', 'Object', 'Forms',
+			// Sorta legacy, but present in strict:
+			'Name',
+		);
+		$transitional = array('Legacy', 'Target', 'Iframe');
+		$xml = array('XMLCommonAttributes');
+		$non_xml = array('NonXMLCommonAttributes');
+
+		// setup basic doctypes
+		$this->doctypes->register(
+			'HTML 4.01 Transitional', false,
+			array_merge($common, $transitional, $non_xml),
+			array('Tidy_Transitional', 'Tidy_Proprietary'),
+			array(),
+			'-//W3C//DTD HTML 4.01 Transitional//EN',
+			'http://www.w3.org/TR/html4/loose.dtd'
+		);
+
+		$this->doctypes->register(
+			'HTML 4.01 Strict', false,
+			array_merge($common, $non_xml),
+			array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
+			array(),
+			'-//W3C//DTD HTML 4.01//EN',
+			'http://www.w3.org/TR/html4/strict.dtd'
+		);
+
+		$this->doctypes->register(
+			'XHTML 1.0 Transitional', true,
+			array_merge($common, $transitional, $xml, $non_xml),
+			array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
+			array(),
+			'-//W3C//DTD XHTML 1.0 Transitional//EN',
+			'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
+		);
+
+		$this->doctypes->register(
+			'XHTML 1.0 Strict', true,
+			array_merge($common, $xml, $non_xml),
+			array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
+			array(),
+			'-//W3C//DTD XHTML 1.0 Strict//EN',
+			'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
+		);
+
+		$this->doctypes->register(
+			'XHTML 1.1', true,
+			// Iframe is a real XHTML 1.1 module, despite being
+			// "transitional"!
+			array_merge($common, $xml, array('Ruby', 'Iframe')),
+			array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
+			array(),
+			'-//W3C//DTD XHTML 1.1//EN',
+			'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
+		);
+
+	}
+
+	/**
+	 * Registers a module to the recognized module list, useful for
+	 * overloading pre-existing modules.
+	 * @param $module Mixed: string module name, with or without
+	 *                HTMLPurifier_HTMLModule prefix, or instance of
+	 *                subclass of HTMLPurifier_HTMLModule.
+	 * @param $overload Boolean whether or not to overload previous modules.
+	 *                  If this is not set, and you do overload a module,
+	 *                  HTML Purifier will complain with a warning.
+	 * @note This function will not call autoload, you must instantiate
+	 *       (and thus invoke) autoload outside the method.
+	 * @note If a string is passed as a module name, different variants
+	 *       will be tested in this order:
+	 *          - Check for HTMLPurifier_HTMLModule_$name
+	 *          - Check all prefixes with $name in order they were added
+	 *          - Check for literal object name
+	 *          - Throw fatal error
+	 *       If your object name collides with an internal class, specify
+	 *       your module manually. All modules must have been included
+	 *       externally: registerModule will not perform inclusions for you!
+	 */
+	public function registerModule($module, $overload = false) {
+		if (is_string($module)) {
+			// attempt to load the module
+			$original_module = $module;
+			$ok = false;
+			foreach ($this->prefixes as $prefix) {
+				$module = $prefix . $original_module;
+				if (class_exists($module)) {
+					$ok = true;
+					break;
+				}
+			}
+			if (!$ok) {
+				$module = $original_module;
+				if (!class_exists($module)) {
+					trigger_error($original_module . ' module does not exist',
+						E_USER_ERROR);
+					return;
+				}
+			}
+			$module = new $module();
+		}
+		if (empty($module->name)) {
+			trigger_error('Module instance of ' . get_class($module) . ' must have name');
+			return;
+		}
+		if (!$overload && isset($this->registeredModules[$module->name])) {
+			trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
+		}
+		$this->registeredModules[$module->name] = $module;
+	}
+
+	/**
+	 * Adds a module to the current doctype by first registering it,
+	 * and then tacking it on to the active doctype
+	 */
+	public function addModule($module) {
+		$this->registerModule($module);
+		if (is_object($module)) $module = $module->name;
+		$this->userModules[] = $module;
+	}
+
+	/**
+	 * Adds a class prefix that registerModule() will use to resolve a
+	 * string name to a concrete class
+	 */
+	public function addPrefix($prefix) {
+		$this->prefixes[] = $prefix;
+	}
+
+	/**
+	 * Performs processing on modules, after being called you may
+	 * use getElement() and getElements()
+	 * @param $config Instance of HTMLPurifier_Config
+	 */
+	public function setup($config) {
+
+		$this->trusted = $config->get('HTML.Trusted');
+
+		// generate
+		$this->doctype = $this->doctypes->make($config);
+		$modules = $this->doctype->modules;
+
+		// take out the default modules that aren't allowed
+		$lookup = $config->get('HTML.AllowedModules');
+		$special_cases = $config->get('HTML.CoreModules');
+
+		if (is_array($lookup)) {
+			foreach ($modules as $k => $m) {
+				if (isset($special_cases[$m])) continue;
+				if (!isset($lookup[$m])) unset($modules[$k]);
+			}
+		}
+
+		// custom modules
+		if ($config->get('HTML.Proprietary')) {
+			$modules[] = 'Proprietary';
+		}
+		if ($config->get('HTML.SafeObject')) {
+			$modules[] = 'SafeObject';
+		}
+		if ($config->get('HTML.SafeEmbed')) {
+			$modules[] = 'SafeEmbed';
+		}
+		if ($config->get('HTML.Nofollow')) {
+			$modules[] = 'Nofollow';
+		}
+		if ($config->get('HTML.TargetBlank')) {
+			$modules[] = 'TargetBlank';
+		}
+
+		// merge in custom modules
+		$modules = array_merge($modules, $this->userModules);
+
+		foreach ($modules as $module) {
+			$this->processModule($module);
+			$this->modules[$module]->setup($config);
+		}
+
+		foreach ($this->doctype->tidyModules as $module) {
+			$this->processModule($module);
+			$this->modules[$module]->setup($config);
+		}
+
+		// prepare any injectors
+		foreach ($this->modules as $module) {
+			$n = array();
+			foreach ($module->info_injector as $i => $injector) {
+				if (!is_object($injector)) {
+					$class = "HTMLPurifier_Injector_$injector";
+					$injector = new $class;
+				}
+				$n[$injector->name] = $injector;
+			}
+			$module->info_injector = $n;
+		}
+
+		// setup lookup table based on all valid modules
+		foreach ($this->modules as $module) {
+			foreach ($module->info as $name => $def) {
+				if (!isset($this->elementLookup[$name])) {
+					$this->elementLookup[$name] = array();
+				}
+				$this->elementLookup[$name][] = $module->name;
+			}
+		}
+
+		// note the different choice
+		$this->contentSets = new HTMLPurifier_ContentSets(
+			// content set assembly deals with all possible modules,
+			// not just ones deemed to be "safe"
+			$this->modules
+		);
+		$this->attrCollections = new HTMLPurifier_AttrCollections(
+			$this->attrTypes,
+			// there is no way to directly disable a global attribute,
+			// but using AllowedAttributes or simply not including
+			// the module in your custom doctype should be sufficient
+			$this->modules
+		);
+	}
+
+	/**
+	 * Takes a module and adds it to the active module collection,
+	 * registering it if necessary.
+	 */
+	public function processModule($module) {
+		if (!isset($this->registeredModules[$module]) || is_object($module)) {
+			$this->registerModule($module);
+		}
+		$this->modules[$module] = $this->registeredModules[$module];
+	}
+
+	/**
+	 * Retrieves merged element definitions.
+	 * @return Array of HTMLPurifier_ElementDef
+	 */
+	public function getElements() {
+
+		$elements = array();
+		foreach ($this->modules as $module) {
+			if (!$this->trusted && !$module->safe) continue;
+			foreach ($module->info as $name => $v) {
+				if (isset($elements[$name])) continue;
+				$elements[$name] = $this->getElement($name);
+			}
+		}
+
+		// remove dud elements, this happens when an element that
+		// appeared to be safe actually wasn't
+		foreach ($elements as $n => $v) {
+			if ($v === false) unset($elements[$n]);
+		}
+
+		return $elements;
+
+	}
+
+	/**
+	 * Retrieves a single merged element definition
+	 * @param $name Name of element
+	 * @param $trusted Boolean trusted overriding parameter: set to true
+	 *                 if you want the full version of an element
+	 * @return Merged HTMLPurifier_ElementDef
+	 * @note You may notice that modules are getting iterated over twice (once
+	 *       in getElements() and once here). This
+	 *       is because
+	 */
+	public function getElement($name, $trusted = null) {
+
+		if (!isset($this->elementLookup[$name])) {
+			return false;
+		}
+
+		// setup global state variables
+		$def = false;
+		if ($trusted === null) $trusted = $this->trusted;
+
+		// iterate through each module that has registered itself to this
+		// element
+		foreach($this->elementLookup[$name] as $module_name) {
+
+			$module = $this->modules[$module_name];
+
+			// refuse to create/merge from a module that is deemed unsafe--
+			// pretend the module doesn't exist--when trusted mode is not on.
+			if (!$trusted && !$module->safe) {
+				continue;
+			}
+
+			// clone is used because, ideally speaking, the original
+			// definition should not be modified. Usually, this will
+			// make no difference, but for consistency's sake
+			$new_def = clone $module->info[$name];
+
+			if (!$def && $new_def->standalone) {
+				$def = $new_def;
+			} elseif ($def) {
+				// This will occur even if $new_def is standalone. In practice,
+				// this will usually result in a full replacement.
+				$def->mergeIn($new_def);
+			} else {
+				// :TODO:
+				// non-standalone definitions that don't have a standalone
+				// to merge into could be deferred to the end
+				// HOWEVER, it is perfectly valid for a non-standalone
+				// definition to lack a standalone definition, even
+				// after all processing: this allows us to safely
+				// specify extra attributes for elements that may not be
+				// enabled all in one place.  In particular, this might
+				// be the case for trusted elements.  WARNING: care must
+				// be taken that the /extra/ definitions are all safe.
+				continue;
+			}
+
+			// attribute value expansions
+			$this->attrCollections->performInclusions($def->attr);
+			$this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
+
+			// descendants_are_inline, for ChildDef_Chameleon
+			if (is_string($def->content_model) &&
+				strpos($def->content_model, 'Inline') !== false) {
+				if ($name != 'del' && $name != 'ins') {
+					// this is for you, ins/del
+					$def->descendants_are_inline = true;
+				}
+			}
+
+			$this->contentSets->generateChildDef($def, $module);
+		}
+
+		// This can occur if there is a blank definition, but no base to
+		// mix it in with
+		if (!$def) return false;
+
+		// add information on required attributes
+		foreach ($def->attr as $attr_name => $attr_def) {
+			if ($attr_def->required) {
+				$def->required_attr[] = $attr_name;
+			}
+		}
+
+		return $def;
+
+	}
 
 }
 

Please login to merge, or discard this patch.

Braces +24 added lines, -8 removed lines patch added patch discarded remove patch

@@ -182,7 +182,9 @@  discard block
 block discarded – undo
      */
     public function addModule($module) {
         $this->registerModule($module);
-        if (is_object($module)) $module = $module->name;
+        if (is_object($module)) {
+        	$module = $module->name;
+        }
         $this->userModules[] = $module;
     }
 
@@ -213,8 +215,12 @@  discard block
 block discarded – undo
 
         if (is_array($lookup)) {
             foreach ($modules as $k => $m) {
-                if (isset($special_cases[$m])) continue;
-                if (!isset($lookup[$m])) unset($modules[$k]);
+                if (isset($special_cases[$m])) {
+                	continue;
+                }
+                if (!isset($lookup[$m])) {
+                	unset($modules[$k]);
+                }
             }
         }
 
@@ -305,9 +311,13 @@  discard block
 block discarded – undo
 
         $elements = array();
         foreach ($this->modules as $module) {
-            if (!$this->trusted && !$module->safe) continue;
+            if (!$this->trusted && !$module->safe) {
+            	continue;
+            }
             foreach ($module->info as $name => $v) {
-                if (isset($elements[$name])) continue;
+                if (isset($elements[$name])) {
+                	continue;
+                }
                 $elements[$name] = $this->getElement($name);
             }
         }
@@ -315,7 +325,9 @@  discard block
 block discarded – undo
         // remove dud elements, this happens when an element that
         // appeared to be safe actually wasn't
         foreach ($elements as $n => $v) {
-            if ($v === false) unset($elements[$n]);
+            if ($v === false) {
+            	unset($elements[$n]);
+            }
         }
 
         return $elements;
@@ -340,7 +352,9 @@  discard block
 block discarded – undo
 
         // setup global state variables
         $def = false;
-        if ($trusted === null) $trusted = $this->trusted;
+        if ($trusted === null) {
+        	$trusted = $this->trusted;
+        }
 
         // iterate through each module that has registered itself to this
         // element
@@ -397,7 +411,9 @@  discard block
 block discarded – undo
 
         // This can occur if there is a blank definition, but no base to
         // mix it in with
-        if (!$def) return false;
+        if (!$def) {
+        	return false;
+        }
 
         // add information on required attributes
         foreach ($def->attr as $attr_name => $attr_def) {

Please login to merge, or discard this patch.

Spacing +6 added lines, -6 removed lines patch added patch discarded remove patch

@@ -47,7 +47,7 @@  discard block
 block discarded – undo
     /** List of prefixes we should use for registering small names */
     public $prefixes = array('HTMLPurifier_HTMLModule_');
 
-    public $contentSets;     /**< Instance of HTMLPurifier_ContentSets */
+    public $contentSets; /**< Instance of HTMLPurifier_ContentSets */
     public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
 
     /** If set to true, unsafe elements and attributes will be allowed */
@@ -150,7 +150,7 @@  discard block
 block discarded – undo
             $original_module = $module;
             $ok = false;
             foreach ($this->prefixes as $prefix) {
-                $module = $prefix . $original_module;
+                $module = $prefix.$original_module;
                 if (class_exists($module)) {
                     $ok = true;
                     break;
@@ -159,7 +159,7 @@  discard block
 block discarded – undo
             if (!$ok) {
                 $module = $original_module;
                 if (!class_exists($module)) {
-                    trigger_error($original_module . ' module does not exist',
+                    trigger_error($original_module.' module does not exist',
                         E_USER_ERROR);
                     return;
                 }
@@ -167,11 +167,11 @@  discard block
 block discarded – undo
             $module = new $module();
         }
         if (empty($module->name)) {
-            trigger_error('Module instance of ' . get_class($module) . ' must have name');
+            trigger_error('Module instance of '.get_class($module).' must have name');
             return;
         }
         if (!$overload && isset($this->registeredModules[$module->name])) {
-            trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
+            trigger_error('Overloading '.$module->name.' without explicit overload parameter', E_USER_WARNING);
         }
         $this->registeredModules[$module->name] = $module;
     }
@@ -344,7 +344,7 @@  discard block
 block discarded – undo
 
         // iterate through each module that has registered itself to this
         // element
-        foreach($this->elementLookup[$name] as $module_name) {
+        foreach ($this->elementLookup[$name] as $module_name) {
 
             $module = $this->modules[$module_name];
 

Please login to merge, or discard this patch.

classes/security/htmlpurifier/library/HTMLPurifier/IDAccumulator.php 3 patches

Doc Comments +1 added lines, -1 removed lines patch added patch discarded remove patch

@@ -19,7 +19,7 @@
 block discarded – undo
      * Builds an IDAccumulator, also initializing the default blacklist
      * @param $config Instance of HTMLPurifier_Config
      * @param $context Instance of HTMLPurifier_Context
-     * @return Fully initialized HTMLPurifier_IDAccumulator
+     * @return HTMLPurifier_IDAccumulator initialized HTMLPurifier_IDAccumulator
      */
     public static function build($config, $context) {
         $id_accumulator = new HTMLPurifier_IDAccumulator();

Please login to merge, or discard this patch.

Indentation +35 added lines, -35 removed lines patch added patch discarded remove patch

@@ -9,44 +9,44 @@
 block discarded – undo
 class HTMLPurifier_IDAccumulator
 {
 
-    /**
-     * Lookup table of IDs we've accumulated.
-     * @public
-     */
-    public $ids = array();
+	/**
+	 * Lookup table of IDs we've accumulated.
+	 * @public
+	 */
+	public $ids = array();
 
-    /**
-     * Builds an IDAccumulator, also initializing the default blacklist
-     * @param $config Instance of HTMLPurifier_Config
-     * @param $context Instance of HTMLPurifier_Context
-     * @return Fully initialized HTMLPurifier_IDAccumulator
-     */
-    public static function build($config, $context) {
-        $id_accumulator = new HTMLPurifier_IDAccumulator();
-        $id_accumulator->load($config->get('Attr.IDBlacklist'));
-        return $id_accumulator;
-    }
+	/**
+	 * Builds an IDAccumulator, also initializing the default blacklist
+	 * @param $config Instance of HTMLPurifier_Config
+	 * @param $context Instance of HTMLPurifier_Context
+	 * @return Fully initialized HTMLPurifier_IDAccumulator
+	 */
+	public static function build($config, $context) {
+		$id_accumulator = new HTMLPurifier_IDAccumulator();
+		$id_accumulator->load($config->get('Attr.IDBlacklist'));
+		return $id_accumulator;
+	}
 
-    /**
-     * Add an ID to the lookup table.
-     * @param $id ID to be added.
-     * @return Bool status, true if success, false if there's a dupe
-     */
-    public function add($id) {
-        if (isset($this->ids[$id])) return false;
-        return $this->ids[$id] = true;
-    }
+	/**
+	 * Add an ID to the lookup table.
+	 * @param $id ID to be added.
+	 * @return Bool status, true if success, false if there's a dupe
+	 */
+	public function add($id) {
+		if (isset($this->ids[$id])) return false;
+		return $this->ids[$id] = true;
+	}
 
-    /**
-     * Load a list of IDs into the lookup table
-     * @param $array_of_ids Array of IDs to load
-     * @note This function doesn't care about duplicates
-     */
-    public function load($array_of_ids) {
-        foreach ($array_of_ids as $id) {
-            $this->ids[$id] = true;
-        }
-    }
+	/**
+	 * Load a list of IDs into the lookup table
+	 * @param $array_of_ids Array of IDs to load
+	 * @note This function doesn't care about duplicates
+	 */
+	public function load($array_of_ids) {
+		foreach ($array_of_ids as $id) {
+			$this->ids[$id] = true;
+		}
+	}
 
 }
 

Please login to merge, or discard this patch.

Braces +3 added lines, -1 removed lines patch added patch discarded remove patch

@@ -33,7 +33,9 @@
 block discarded – undo
      * @return Bool status, true if success, false if there's a dupe
      */
     public function add($id) {
-        if (isset($this->ids[$id])) return false;
+        if (isset($this->ids[$id])) {
+        	return false;
+        }
         return $this->ids[$id] = true;
     }
 

Please login to merge, or discard this patch.

classes/security/htmlpurifier/library/HTMLPurifier/Injector.php 4 patches

Doc Comments +3 added lines, -2 removed lines patch added patch discarded remove patch

@@ -64,6 +64,7 @@  discard block
 block discarded – undo
      * result in infinite loops if not used carefully.
      * @warning HTML Purifier will prevent you from fast-forwarding with this
      *          function.
+     * @param integer $index
      */
     public function rewind($index) {
         $this->rewind = $index;
@@ -123,8 +124,8 @@  discard block
 block discarded – undo
 
     /**
      * Tests if the context node allows a certain element
-     * @param $name Name of element to test for
-     * @return True if element is allowed, false if it is not
+     * @param string $name Name of element to test for
+     * @return boolean if element is allowed, false if it is not
      */
     public function allowsElement($name) {
         if (!empty($this->currentNesting)) {

Please login to merge, or discard this patch.

Indentation +216 added lines, -216 removed lines patch added patch discarded remove patch

@@ -16,222 +16,222 @@
 block discarded – undo
 abstract class HTMLPurifier_Injector
 {
 
-    /**
-     * Advisory name of injector, this is for friendly error messages
-     */
-    public $name;
-
-    /**
-     * Instance of HTMLPurifier_HTMLDefinition
-     */
-    protected $htmlDefinition;
-
-    /**
-     * Reference to CurrentNesting variable in Context. This is an array
-     * list of tokens that we are currently "inside"
-     */
-    protected $currentNesting;
-
-    /**
-     * Reference to InputTokens variable in Context. This is an array
-     * list of the input tokens that are being processed.
-     */
-    protected $inputTokens;
-
-    /**
-     * Reference to InputIndex variable in Context. This is an integer
-     * array index for $this->inputTokens that indicates what token
-     * is currently being processed.
-     */
-    protected $inputIndex;
-
-    /**
-     * Array of elements and attributes this injector creates and therefore
-     * need to be allowed by the definition. Takes form of
-     * array('element' => array('attr', 'attr2'), 'element2')
-     */
-    public $needed = array();
-
-    /**
-     * Index of inputTokens to rewind to.
-     */
-    protected $rewind = false;
-
-    /**
-     * Rewind to a spot to re-perform processing. This is useful if you
-     * deleted a node, and now need to see if this change affected any
-     * earlier nodes. Rewinding does not affect other injectors, and can
-     * result in infinite loops if not used carefully.
-     * @warning HTML Purifier will prevent you from fast-forwarding with this
-     *          function.
-     */
-    public function rewind($index) {
-        $this->rewind = $index;
-    }
-
-    /**
-     * Retrieves rewind, and then unsets it.
-     */
-    public function getRewind() {
-        $r = $this->rewind;
-        $this->rewind = false;
-        return $r;
-    }
-
-    /**
-     * Prepares the injector by giving it the config and context objects:
-     * this allows references to important variables to be made within
-     * the injector. This function also checks if the HTML environment
-     * will work with the Injector (see checkNeeded()).
-     * @param $config Instance of HTMLPurifier_Config
-     * @param $context Instance of HTMLPurifier_Context
-     * @return Boolean false if success, string of missing needed element/attribute if failure
-     */
-    public function prepare($config, $context) {
-        $this->htmlDefinition = $config->getHTMLDefinition();
-        // Even though this might fail, some unit tests ignore this and
-        // still test checkNeeded, so be careful. Maybe get rid of that
-        // dependency.
-        $result = $this->checkNeeded($config);
-        if ($result !== false) return $result;
-        $this->currentNesting =& $context->get('CurrentNesting');
-        $this->inputTokens    =& $context->get('InputTokens');
-        $this->inputIndex     =& $context->get('InputIndex');
-        return false;
-    }
-
-    /**
-     * This function checks if the HTML environment
-     * will work with the Injector: if p tags are not allowed, the
-     * Auto-Paragraphing injector should not be enabled.
-     * @param $config Instance of HTMLPurifier_Config
-     * @param $context Instance of HTMLPurifier_Context
-     * @return Boolean false if success, string of missing needed element/attribute if failure
-     */
-    public function checkNeeded($config) {
-        $def = $config->getHTMLDefinition();
-        foreach ($this->needed as $element => $attributes) {
-            if (is_int($element)) $element = $attributes;
-            if (!isset($def->info[$element])) return $element;
-            if (!is_array($attributes)) continue;
-            foreach ($attributes as $name) {
-                if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
-            }
-        }
-        return false;
-    }
-
-    /**
-     * Tests if the context node allows a certain element
-     * @param $name Name of element to test for
-     * @return True if element is allowed, false if it is not
-     */
-    public function allowsElement($name) {
-        if (!empty($this->currentNesting)) {
-            $parent_token = array_pop($this->currentNesting);
-            $this->currentNesting[] = $parent_token;
-            $parent = $this->htmlDefinition->info[$parent_token->name];
-        } else {
-            $parent = $this->htmlDefinition->info_parent_def;
-        }
-        if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
-            return false;
-        }
-        // check for exclusion
-        for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
-            $node = $this->currentNesting[$i];
-            $def  = $this->htmlDefinition->info[$node->name];
-            if (isset($def->excludes[$name])) return false;
-        }
-        return true;
-    }
-
-    /**
-     * Iterator function, which starts with the next token and continues until
-     * you reach the end of the input tokens.
-     * @warning Please prevent previous references from interfering with this
-     *          functions by setting $i = null beforehand!
-     * @param &$i Current integer index variable for inputTokens
-     * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
-     */
-    protected function forward(&$i, &$current) {
-        if ($i === null) $i = $this->inputIndex + 1;
-        else $i++;
-        if (!isset($this->inputTokens[$i])) return false;
-        $current = $this->inputTokens[$i];
-        return true;
-    }
-
-    /**
-     * Similar to _forward, but accepts a third parameter $nesting (which
-     * should be initialized at 0) and stops when we hit the end tag
-     * for the node $this->inputIndex starts in.
-     */
-    protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
-        $result = $this->forward($i, $current);
-        if (!$result) return false;
-        if ($nesting === null) $nesting = 0;
-        if     ($current instanceof HTMLPurifier_Token_Start) $nesting++;
-        elseif ($current instanceof HTMLPurifier_Token_End) {
-            if ($nesting <= 0) return false;
-            $nesting--;
-        }
-        return true;
-    }
-
-    /**
-     * Iterator function, starts with the previous token and continues until
-     * you reach the beginning of input tokens.
-     * @warning Please prevent previous references from interfering with this
-     *          functions by setting $i = null beforehand!
-     * @param &$i Current integer index variable for inputTokens
-     * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
-     */
-    protected function backward(&$i, &$current) {
-        if ($i === null) $i = $this->inputIndex - 1;
-        else $i--;
-        if ($i < 0) return false;
-        $current = $this->inputTokens[$i];
-        return true;
-    }
-
-    /**
-     * Initializes the iterator at the current position. Use in a do {} while;
-     * loop to force the _forward and _backward functions to start at the
-     * current location.
-     * @warning Please prevent previous references from interfering with this
-     *          functions by setting $i = null beforehand!
-     * @param &$i Current integer index variable for inputTokens
-     * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
-     */
-    protected function current(&$i, &$current) {
-        if ($i === null) $i = $this->inputIndex;
-        $current = $this->inputTokens[$i];
-    }
-
-    /**
-     * Handler that is called when a text token is processed
-     */
-    public function handleText(&$token) {}
-
-    /**
-     * Handler that is called when a start or empty token is processed
-     */
-    public function handleElement(&$token) {}
-
-    /**
-     * Handler that is called when an end token is processed
-     */
-    public function handleEnd(&$token) {
-        $this->notifyEnd($token);
-    }
-
-    /**
-     * Notifier that is called when an end token is processed
-     * @note This differs from handlers in that the token is read-only
-     * @deprecated
-     */
-    public function notifyEnd($token) {}
+	/**
+	 * Advisory name of injector, this is for friendly error messages
+	 */
+	public $name;
+
+	/**
+	 * Instance of HTMLPurifier_HTMLDefinition
+	 */
+	protected $htmlDefinition;
+
+	/**
+	 * Reference to CurrentNesting variable in Context. This is an array
+	 * list of tokens that we are currently "inside"
+	 */
+	protected $currentNesting;
+
+	/**
+	 * Reference to InputTokens variable in Context. This is an array
+	 * list of the input tokens that are being processed.
+	 */
+	protected $inputTokens;
+
+	/**
+	 * Reference to InputIndex variable in Context. This is an integer
+	 * array index for $this->inputTokens that indicates what token
+	 * is currently being processed.
+	 */
+	protected $inputIndex;
+
+	/**
+	 * Array of elements and attributes this injector creates and therefore
+	 * need to be allowed by the definition. Takes form of
+	 * array('element' => array('attr', 'attr2'), 'element2')
+	 */
+	public $needed = array();
+
+	/**
+	 * Index of inputTokens to rewind to.
+	 */
+	protected $rewind = false;
+
+	/**
+	 * Rewind to a spot to re-perform processing. This is useful if you
+	 * deleted a node, and now need to see if this change affected any
+	 * earlier nodes. Rewinding does not affect other injectors, and can
+	 * result in infinite loops if not used carefully.
+	 * @warning HTML Purifier will prevent you from fast-forwarding with this
+	 *          function.
+	 */
+	public function rewind($index) {
+		$this->rewind = $index;
+	}
+
+	/**
+	 * Retrieves rewind, and then unsets it.
+	 */
+	public function getRewind() {
+		$r = $this->rewind;
+		$this->rewind = false;
+		return $r;
+	}
+
+	/**
+	 * Prepares the injector by giving it the config and context objects:
+	 * this allows references to important variables to be made within
+	 * the injector. This function also checks if the HTML environment
+	 * will work with the Injector (see checkNeeded()).
+	 * @param $config Instance of HTMLPurifier_Config
+	 * @param $context Instance of HTMLPurifier_Context
+	 * @return Boolean false if success, string of missing needed element/attribute if failure
+	 */
+	public function prepare($config, $context) {
+		$this->htmlDefinition = $config->getHTMLDefinition();
+		// Even though this might fail, some unit tests ignore this and
+		// still test checkNeeded, so be careful. Maybe get rid of that
+		// dependency.
+		$result = $this->checkNeeded($config);
+		if ($result !== false) return $result;
+		$this->currentNesting =& $context->get('CurrentNesting');
+		$this->inputTokens    =& $context->get('InputTokens');
+		$this->inputIndex     =& $context->get('InputIndex');
+		return false;
+	}
+
+	/**
+	 * This function checks if the HTML environment
+	 * will work with the Injector: if p tags are not allowed, the
+	 * Auto-Paragraphing injector should not be enabled.
+	 * @param $config Instance of HTMLPurifier_Config
+	 * @param $context Instance of HTMLPurifier_Context
+	 * @return Boolean false if success, string of missing needed element/attribute if failure
+	 */
+	public function checkNeeded($config) {
+		$def = $config->getHTMLDefinition();
+		foreach ($this->needed as $element => $attributes) {
+			if (is_int($element)) $element = $attributes;
+			if (!isset($def->info[$element])) return $element;
+			if (!is_array($attributes)) continue;
+			foreach ($attributes as $name) {
+				if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
+			}
+		}
+		return false;
+	}
+
+	/**
+	 * Tests if the context node allows a certain element
+	 * @param $name Name of element to test for
+	 * @return True if element is allowed, false if it is not
+	 */
+	public function allowsElement($name) {
+		if (!empty($this->currentNesting)) {
+			$parent_token = array_pop($this->currentNesting);
+			$this->currentNesting[] = $parent_token;
+			$parent = $this->htmlDefinition->info[$parent_token->name];
+		} else {
+			$parent = $this->htmlDefinition->info_parent_def;
+		}
+		if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
+			return false;
+		}
+		// check for exclusion
+		for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
+			$node = $this->currentNesting[$i];
+			$def  = $this->htmlDefinition->info[$node->name];
+			if (isset($def->excludes[$name])) return false;
+		}
+		return true;
+	}
+
+	/**
+	 * Iterator function, which starts with the next token and continues until
+	 * you reach the end of the input tokens.
+	 * @warning Please prevent previous references from interfering with this
+	 *          functions by setting $i = null beforehand!
+	 * @param &$i Current integer index variable for inputTokens
+	 * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
+	 */
+	protected function forward(&$i, &$current) {
+		if ($i === null) $i = $this->inputIndex + 1;
+		else $i++;
+		if (!isset($this->inputTokens[$i])) return false;
+		$current = $this->inputTokens[$i];
+		return true;
+	}
+
+	/**
+	 * Similar to _forward, but accepts a third parameter $nesting (which
+	 * should be initialized at 0) and stops when we hit the end tag
+	 * for the node $this->inputIndex starts in.
+	 */
+	protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
+		$result = $this->forward($i, $current);
+		if (!$result) return false;
+		if ($nesting === null) $nesting = 0;
+		if     ($current instanceof HTMLPurifier_Token_Start) $nesting++;
+		elseif ($current instanceof HTMLPurifier_Token_End) {
+			if ($nesting <= 0) return false;
+			$nesting--;
+		}
+		return true;
+	}
+
+	/**
+	 * Iterator function, starts with the previous token and continues until
+	 * you reach the beginning of input tokens.
+	 * @warning Please prevent previous references from interfering with this
+	 *          functions by setting $i = null beforehand!
+	 * @param &$i Current integer index variable for inputTokens
+	 * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
+	 */
+	protected function backward(&$i, &$current) {
+		if ($i === null) $i = $this->inputIndex - 1;
+		else $i--;
+		if ($i < 0) return false;
+		$current = $this->inputTokens[$i];
+		return true;
+	}
+
+	/**
+	 * Initializes the iterator at the current position. Use in a do {} while;
+	 * loop to force the _forward and _backward functions to start at the
+	 * current location.
+	 * @warning Please prevent previous references from interfering with this
+	 *          functions by setting $i = null beforehand!
+	 * @param &$i Current integer index variable for inputTokens
+	 * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
+	 */
+	protected function current(&$i, &$current) {
+		if ($i === null) $i = $this->inputIndex;
+		$current = $this->inputTokens[$i];
+	}
+
+	/**
+	 * Handler that is called when a text token is processed
+	 */
+	public function handleText(&$token) {}
+
+	/**
+	 * Handler that is called when a start or empty token is processed
+	 */
+	public function handleElement(&$token) {}
+
+	/**
+	 * Handler that is called when an end token is processed
+	 */
+	public function handleEnd(&$token) {
+		$this->notifyEnd($token);
+	}
+
+	/**
+	 * Notifier that is called when an end token is processed
+	 * @note This differs from handlers in that the token is read-only
+	 * @deprecated
+	 */
+	public function notifyEnd($token) {}
 
 
 }

Please login to merge, or discard this patch.

Spacing +4 added lines, -4 removed lines patch added patch discarded remove patch

@@ -94,9 +94,9 @@  discard block
 block discarded – undo
         // dependency.
         $result = $this->checkNeeded($config);
         if ($result !== false) return $result;
-        $this->currentNesting =& $context->get('CurrentNesting');
-        $this->inputTokens    =& $context->get('InputTokens');
-        $this->inputIndex     =& $context->get('InputIndex');
+        $this->currentNesting = & $context->get('CurrentNesting');
+        $this->inputTokens    = & $context->get('InputTokens');
+        $this->inputIndex     = & $context->get('InputIndex');
         return false;
     }
 
@@ -171,7 +171,7 @@  discard block
 block discarded – undo
         $result = $this->forward($i, $current);
         if (!$result) return false;
         if ($nesting === null) $nesting = 0;
-        if     ($current instanceof HTMLPurifier_Token_Start) $nesting++;
+        if ($current instanceof HTMLPurifier_Token_Start) $nesting++;
         elseif ($current instanceof HTMLPurifier_Token_End) {
             if ($nesting <= 0) return false;
             $nesting--;

Please login to merge, or discard this patch.

Braces +49 added lines, -18 removed lines patch added patch discarded remove patch

@@ -93,7 +93,9 @@  discard block
 block discarded – undo
         // still test checkNeeded, so be careful. Maybe get rid of that
         // dependency.
         $result = $this->checkNeeded($config);
-        if ($result !== false) return $result;
+        if ($result !== false) {
+        	return $result;
+        }
         $this->currentNesting =& $context->get('CurrentNesting');
         $this->inputTokens    =& $context->get('InputTokens');
         $this->inputIndex     =& $context->get('InputIndex');
@@ -111,11 +113,19 @@  discard block
 block discarded – undo
     public function checkNeeded($config) {
         $def = $config->getHTMLDefinition();
         foreach ($this->needed as $element => $attributes) {
-            if (is_int($element)) $element = $attributes;
-            if (!isset($def->info[$element])) return $element;
-            if (!is_array($attributes)) continue;
+            if (is_int($element)) {
+            	$element = $attributes;
+            }
+            if (!isset($def->info[$element])) {
+            	return $element;
+            }
+            if (!is_array($attributes)) {
+            	continue;
+            }
             foreach ($attributes as $name) {
-                if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
+                if (!isset($def->info[$element]->attr[$name])) {
+                	return "$element.$name";
+                }
             }
         }
         return false;
@@ -141,7 +151,9 @@  discard block
 block discarded – undo
         for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
             $node = $this->currentNesting[$i];
             $def  = $this->htmlDefinition->info[$node->name];
-            if (isset($def->excludes[$name])) return false;
+            if (isset($def->excludes[$name])) {
+            	return false;
+            }
         }
         return true;
     }
@@ -155,9 +167,14 @@  discard block
 block discarded – undo
      * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
      */
     protected function forward(&$i, &$current) {
-        if ($i === null) $i = $this->inputIndex + 1;
-        else $i++;
-        if (!isset($this->inputTokens[$i])) return false;
+        if ($i === null) {
+        	$i = $this->inputIndex + 1;
+        } else {
+        	$i++;
+        }
+        if (!isset($this->inputTokens[$i])) {
+        	return false;
+        }
         $current = $this->inputTokens[$i];
         return true;
     }
@@ -169,11 +186,18 @@  discard block
 block discarded – undo
      */
     protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
         $result = $this->forward($i, $current);
-        if (!$result) return false;
-        if ($nesting === null) $nesting = 0;
-        if     ($current instanceof HTMLPurifier_Token_Start) $nesting++;
-        elseif ($current instanceof HTMLPurifier_Token_End) {
-            if ($nesting <= 0) return false;
+        if (!$result) {
+        	return false;
+        }
+        if ($nesting === null) {
+        	$nesting = 0;
+        }
+        if     ($current instanceof HTMLPurifier_Token_Start) {
+        	$nesting++;
+        } elseif ($current instanceof HTMLPurifier_Token_End) {
+            if ($nesting <= 0) {
+            	return false;
+            }
             $nesting--;
         }
         return true;
@@ -188,9 +212,14 @@  discard block
 block discarded – undo
      * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
      */
     protected function backward(&$i, &$current) {
-        if ($i === null) $i = $this->inputIndex - 1;
-        else $i--;
-        if ($i < 0) return false;
+        if ($i === null) {
+        	$i = $this->inputIndex - 1;
+        } else {
+        	$i--;
+        }
+        if ($i < 0) {
+        	return false;
+        }
         $current = $this->inputTokens[$i];
         return true;
     }
@@ -205,7 +234,9 @@  discard block
 block discarded – undo
      * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
      */
     protected function current(&$i, &$current) {
-        if ($i === null) $i = $this->inputIndex;
+        if ($i === null) {
+        	$i = $this->inputIndex;
+        }
         $current = $this->inputTokens[$i];
     }
 

Please login to merge, or discard this patch.

classes/security/htmlpurifier/library/HTMLPurifier/Language.php 4 patches

Doc Comments +1 added lines, -1 removed lines patch added patch discarded remove patch

@@ -66,7 +66,7 @@
 block discarded – undo
 
     /**
      * Retrieves a localised message.
-     * @param $key string identifier of message
+     * @param string $key string identifier of message
      * @return string localised message
      */
     public function getMessage($key) {

Please login to merge, or discard this patch.

Indentation +150 added lines, -150 removed lines patch added patch discarded remove patch

@@ -7,156 +7,156 @@
 block discarded – undo
 class HTMLPurifier_Language
 {
 
-    /**
-     * ISO 639 language code of language. Prefers shortest possible version
-     */
-    public $code = 'en';
-
-    /**
-     * Fallback language code
-     */
-    public $fallback = false;
-
-    /**
-     * Array of localizable messages
-     */
-    public $messages = array();
-
-    /**
-     * Array of localizable error codes
-     */
-    public $errorNames = array();
-
-    /**
-     * True if no message file was found for this language, so English
-     * is being used instead. Check this if you'd like to notify the
-     * user that they've used a non-supported language.
-     */
-    public $error = false;
-
-    /**
-     * Has the language object been loaded yet?
-     * @todo Make it private, fix usage in HTMLPurifier_LanguageTest
-     */
-    public $_loaded = false;
-
-    /**
-     * Instances of HTMLPurifier_Config and HTMLPurifier_Context
-     */
-    protected $config, $context;
-
-    public function __construct($config, $context) {
-        $this->config  = $config;
-        $this->context = $context;
-    }
-
-    /**
-     * Loads language object with necessary info from factory cache
-     * @note This is a lazy loader
-     */
-    public function load() {
-        if ($this->_loaded) return;
-        $factory = HTMLPurifier_LanguageFactory::instance();
-        $factory->loadLanguage($this->code);
-        foreach ($factory->keys as $key) {
-            $this->$key = $factory->cache[$this->code][$key];
-        }
-        $this->_loaded = true;
-    }
-
-    /**
-     * Retrieves a localised message.
-     * @param $key string identifier of message
-     * @return string localised message
-     */
-    public function getMessage($key) {
-        if (!$this->_loaded) $this->load();
-        if (!isset($this->messages[$key])) return "[$key]";
-        return $this->messages[$key];
-    }
-
-    /**
-     * Retrieves a localised error name.
-     * @param $int integer error number, corresponding to PHP's error
-     *             reporting
-     * @return string localised message
-     */
-    public function getErrorName($int) {
-        if (!$this->_loaded) $this->load();
-        if (!isset($this->errorNames[$int])) return "[Error: $int]";
-        return $this->errorNames[$int];
-    }
-
-    /**
-     * Converts an array list into a string readable representation
-     */
-    public function listify($array) {
-        $sep      = $this->getMessage('Item separator');
-        $sep_last = $this->getMessage('Item separator last');
-        $ret = '';
-        for ($i = 0, $c = count($array); $i < $c; $i++) {
-            if ($i == 0) {
-            } elseif ($i + 1 < $c) {
-                $ret .= $sep;
-            } else {
-                $ret .= $sep_last;
-            }
-            $ret .= $array[$i];
-        }
-        return $ret;
-    }
-
-    /**
-     * Formats a localised message with passed parameters
-     * @param $key string identifier of message
-     * @param $args Parameters to substitute in
-     * @return string localised message
-     * @todo Implement conditionals? Right now, some messages make
-     *     reference to line numbers, but those aren't always available
-     */
-    public function formatMessage($key, $args = array()) {
-        if (!$this->_loaded) $this->load();
-        if (!isset($this->messages[$key])) return "[$key]";
-        $raw = $this->messages[$key];
-        $subst = array();
-        $generator = false;
-        foreach ($args as $i => $value) {
-            if (is_object($value)) {
-                if ($value instanceof HTMLPurifier_Token) {
-                    // factor this out some time
-                    if (!$generator) $generator = $this->context->get('Generator');
-                    if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
-                    if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
-                    $subst['$'.$i.'.Compact'] =
-                    $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
-                    // a more complex algorithm for compact representation
-                    // could be introduced for all types of tokens. This
-                    // may need to be factored out into a dedicated class
-                    if (!empty($value->attr)) {
-                        $stripped_token = clone $value;
-                        $stripped_token->attr = array();
-                        $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
-                    }
-                    $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
-                }
-                continue;
-            } elseif (is_array($value)) {
-                $keys = array_keys($value);
-                if (array_keys($keys) === $keys) {
-                    // list
-                    $subst['$'.$i] = $this->listify($value);
-                } else {
-                    // associative array
-                    // no $i implementation yet, sorry
-                    $subst['$'.$i.'.Keys'] = $this->listify($keys);
-                    $subst['$'.$i.'.Values'] = $this->listify(array_values($value));
-                }
-                continue;
-            }
-            $subst['$' . $i] = $value;
-        }
-        return strtr($raw, $subst);
-    }
+	/**
+	 * ISO 639 language code of language. Prefers shortest possible version
+	 */
+	public $code = 'en';
+
+	/**
+	 * Fallback language code
+	 */
+	public $fallback = false;
+
+	/**
+	 * Array of localizable messages
+	 */
+	public $messages = array();
+
+	/**
+	 * Array of localizable error codes
+	 */
+	public $errorNames = array();
+
+	/**
+	 * True if no message file was found for this language, so English
+	 * is being used instead. Check this if you'd like to notify the
+	 * user that they've used a non-supported language.
+	 */
+	public $error = false;
+
+	/**
+	 * Has the language object been loaded yet?
+	 * @todo Make it private, fix usage in HTMLPurifier_LanguageTest
+	 */
+	public $_loaded = false;
+
+	/**
+	 * Instances of HTMLPurifier_Config and HTMLPurifier_Context
+	 */
+	protected $config, $context;
+
+	public function __construct($config, $context) {
+		$this->config  = $config;
+		$this->context = $context;
+	}
+
+	/**
+	 * Loads language object with necessary info from factory cache
+	 * @note This is a lazy loader
+	 */
+	public function load() {
+		if ($this->_loaded) return;
+		$factory = HTMLPurifier_LanguageFactory::instance();
+		$factory->loadLanguage($this->code);
+		foreach ($factory->keys as $key) {
+			$this->$key = $factory->cache[$this->code][$key];
+		}
+		$this->_loaded = true;
+	}
+
+	/**
+	 * Retrieves a localised message.
+	 * @param $key string identifier of message
+	 * @return string localised message
+	 */
+	public function getMessage($key) {
+		if (!$this->_loaded) $this->load();
+		if (!isset($this->messages[$key])) return "[$key]";
+		return $this->messages[$key];
+	}
+
+	/**
+	 * Retrieves a localised error name.
+	 * @param $int integer error number, corresponding to PHP's error
+	 *             reporting
+	 * @return string localised message
+	 */
+	public function getErrorName($int) {
+		if (!$this->_loaded) $this->load();
+		if (!isset($this->errorNames[$int])) return "[Error: $int]";
+		return $this->errorNames[$int];
+	}
+
+	/**
+	 * Converts an array list into a string readable representation
+	 */
+	public function listify($array) {
+		$sep      = $this->getMessage('Item separator');
+		$sep_last = $this->getMessage('Item separator last');
+		$ret = '';
+		for ($i = 0, $c = count($array); $i < $c; $i++) {
+			if ($i == 0) {
+			} elseif ($i + 1 < $c) {
+				$ret .= $sep;
+			} else {
+				$ret .= $sep_last;
+			}
+			$ret .= $array[$i];
+		}
+		return $ret;
+	}
+
+	/**
+	 * Formats a localised message with passed parameters
+	 * @param $key string identifier of message
+	 * @param $args Parameters to substitute in
+	 * @return string localised message
+	 * @todo Implement conditionals? Right now, some messages make
+	 *     reference to line numbers, but those aren't always available
+	 */
+	public function formatMessage($key, $args = array()) {
+		if (!$this->_loaded) $this->load();
+		if (!isset($this->messages[$key])) return "[$key]";
+		$raw = $this->messages[$key];
+		$subst = array();
+		$generator = false;
+		foreach ($args as $i => $value) {
+			if (is_object($value)) {
+				if ($value instanceof HTMLPurifier_Token) {
+					// factor this out some time
+					if (!$generator) $generator = $this->context->get('Generator');
+					if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
+					if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
+					$subst['$'.$i.'.Compact'] =
+					$subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
+					// a more complex algorithm for compact representation
+					// could be introduced for all types of tokens. This
+					// may need to be factored out into a dedicated class
+					if (!empty($value->attr)) {
+						$stripped_token = clone $value;
+						$stripped_token->attr = array();
+						$subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
+					}
+					$subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
+				}
+				continue;
+			} elseif (is_array($value)) {
+				$keys = array_keys($value);
+				if (array_keys($keys) === $keys) {
+					// list
+					$subst['$'.$i] = $this->listify($value);
+				} else {
+					// associative array
+					// no $i implementation yet, sorry
+					$subst['$'.$i.'.Keys'] = $this->listify($keys);
+					$subst['$'.$i.'.Values'] = $this->listify(array_values($value));
+				}
+				continue;
+			}
+			$subst['$' . $i] = $value;
+		}
+		return strtr($raw, $subst);
+	}
 
 }
 

Please login to merge, or discard this patch.

Braces +30 added lines, -10 removed lines patch added patch discarded remove patch

@@ -55,7 +55,9 @@  discard block
 block discarded – undo
      * @note This is a lazy loader
      */
     public function load() {
-        if ($this->_loaded) return;
+        if ($this->_loaded) {
+        	return;
+        }
         $factory = HTMLPurifier_LanguageFactory::instance();
         $factory->loadLanguage($this->code);
         foreach ($factory->keys as $key) {
@@ -70,8 +72,12 @@  discard block
 block discarded – undo
      * @return string localised message
      */
     public function getMessage($key) {
-        if (!$this->_loaded) $this->load();
-        if (!isset($this->messages[$key])) return "[$key]";
+        if (!$this->_loaded) {
+        	$this->load();
+        }
+        if (!isset($this->messages[$key])) {
+        	return "[$key]";
+        }
         return $this->messages[$key];
     }
 
@@ -82,8 +88,12 @@  discard block
 block discarded – undo
      * @return string localised message
      */
     public function getErrorName($int) {
-        if (!$this->_loaded) $this->load();
-        if (!isset($this->errorNames[$int])) return "[Error: $int]";
+        if (!$this->_loaded) {
+        	$this->load();
+        }
+        if (!isset($this->errorNames[$int])) {
+        	return "[Error: $int]";
+        }
         return $this->errorNames[$int];
     }
 
@@ -115,8 +125,12 @@  discard block
 block discarded – undo
      *     reference to line numbers, but those aren't always available
      */
     public function formatMessage($key, $args = array()) {
-        if (!$this->_loaded) $this->load();
-        if (!isset($this->messages[$key])) return "[$key]";
+        if (!$this->_loaded) {
+        	$this->load();
+        }
+        if (!isset($this->messages[$key])) {
+        	return "[$key]";
+        }
         $raw = $this->messages[$key];
         $subst = array();
         $generator = false;
@@ -124,9 +138,15 @@  discard block
 block discarded – undo
             if (is_object($value)) {
                 if ($value instanceof HTMLPurifier_Token) {
                     // factor this out some time
-                    if (!$generator) $generator = $this->context->get('Generator');
-                    if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
-                    if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
+                    if (!$generator) {
+                    	$generator = $this->context->get('Generator');
+                    }
+                    if (isset($value->name)) {
+                    	$subst['$'.$i.'.Name'] = $value->name;
+                    }
+                    if (isset($value->data)) {
+                    	$subst['$'.$i.'.Data'] = $value->data;
+                    }
                     $subst['$'.$i.'.Compact'] =
                     $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
                     // a more complex algorithm for compact representation

Please login to merge, or discard this patch.

Spacing +1 added lines, -1 removed lines patch added patch discarded remove patch

@@ -153,7 +153,7 @@
 block discarded – undo
                 }
                 continue;
             }
-            $subst['$' . $i] = $value;
+            $subst['$'.$i] = $value;
         }
         return strtr($raw, $subst);
     }

Please login to merge, or discard this patch.

classes/security/htmlpurifier/library/HTMLPurifier/Lexer/DirectLex.php 4 patches

Doc Comments +4 added lines, -1 removed lines patch added patch discarded remove patch

@@ -319,6 +319,9 @@  discard block
 block discarded – undo
 
     /**
      * PHP 5.0.x compatible substr_count that implements offset and length
+     * @param string $needle
+     * @param integer $offset
+     * @param integer $length
      */
     protected function substrCount($haystack, $needle, $offset, $length) {
         static $oldVersion;
@@ -336,7 +339,7 @@  discard block
 block discarded – undo
     /**
      * Takes the inside of an HTML tag and makes an assoc array of attributes.
      *
-     * @param $string Inside of tag excluding name.
+     * @param string $string Inside of tag excluding name.
      * @returns Assoc array of attributes.
      */
     public function parseAttributeString($string, $config, $context) {

Please login to merge, or discard this patch.

Indentation +471 added lines, -471 removed lines patch added patch discarded remove patch

@@ -13,477 +13,477 @@
 block discarded – undo
 class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
 {
 
-    public $tracksLineNumbers = true;
-
-    /**
-     * Whitespace characters for str(c)spn.
-     */
-    protected $_whitespace = "\x20\x09\x0D\x0A";
-
-    /**
-     * Callback function for script CDATA fudge
-     * @param $matches, in form of array(opening tag, contents, closing tag)
-     */
-    protected function scriptCallback($matches) {
-        return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false) . $matches[3];
-    }
-
-    public function tokenizeHTML($html, $config, $context) {
-
-        // special normalization for script tags without any armor
-        // our "armor" heurstic is a < sign any number of whitespaces after
-        // the first script tag
-        if ($config->get('HTML.Trusted')) {
-            $html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
-                array($this, 'scriptCallback'), $html);
-        }
-
-        $html = $this->normalize($html, $config, $context);
-
-        $cursor = 0; // our location in the text
-        $inside_tag = false; // whether or not we're parsing the inside of a tag
-        $array = array(); // result array
-
-        // This is also treated to mean maintain *column* numbers too
-        $maintain_line_numbers = $config->get('Core.MaintainLineNumbers');
-
-        if ($maintain_line_numbers === null) {
-            // automatically determine line numbering by checking
-            // if error collection is on
-            $maintain_line_numbers = $config->get('Core.CollectErrors');
-        }
-
-        if ($maintain_line_numbers) {
-            $current_line = 1;
-            $current_col  = 0;
-            $length = strlen($html);
-        } else {
-            $current_line = false;
-            $current_col  = false;
-            $length = false;
-        }
-        $context->register('CurrentLine', $current_line);
-        $context->register('CurrentCol',  $current_col);
-        $nl = "\n";
-        // how often to manually recalculate. This will ALWAYS be right,
-        // but it's pretty wasteful. Set to 0 to turn off
-        $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval');
-
-        $e = false;
-        if ($config->get('Core.CollectErrors')) {
-            $e =& $context->get('ErrorCollector');
-        }
-
-        // for testing synchronization
-        $loops = 0;
-
-        while(++$loops) {
-
-            // $cursor is either at the start of a token, or inside of
-            // a tag (i.e. there was a < immediately before it), as indicated
-            // by $inside_tag
-
-            if ($maintain_line_numbers) {
-
-                // $rcursor, however, is always at the start of a token.
-                $rcursor = $cursor - (int) $inside_tag;
-
-                // Column number is cheap, so we calculate it every round.
-                // We're interested at the *end* of the newline string, so
-                // we need to add strlen($nl) == 1 to $nl_pos before subtracting it
-                // from our "rcursor" position.
-                $nl_pos = strrpos($html, $nl, $rcursor - $length);
-                $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
-
-                // recalculate lines
-                if (
-                    $synchronize_interval &&  // synchronization is on
-                    $cursor > 0 &&            // cursor is further than zero
-                    $loops % $synchronize_interval === 0 // time to synchronize!
-                ) {
-                    $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
-                }
-
-            }
-
-            $position_next_lt = strpos($html, '<', $cursor);
-            $position_next_gt = strpos($html, '>', $cursor);
-
-            // triggers on "<b>asdf</b>" but not "asdf <b></b>"
-            // special case to set up context
-            if ($position_next_lt === $cursor) {
-                $inside_tag = true;
-                $cursor++;
-            }
-
-            if (!$inside_tag && $position_next_lt !== false) {
-                // We are not inside tag and there still is another tag to parse
-                $token = new
-                    HTMLPurifier_Token_Text(
-                        $this->parseData(
-                            substr(
-                                $html, $cursor, $position_next_lt - $cursor
-                            )
-                        )
-                    );
-                if ($maintain_line_numbers) {
-                    $token->rawPosition($current_line, $current_col);
-                    $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
-                }
-                $array[] = $token;
-                $cursor  = $position_next_lt + 1;
-                $inside_tag = true;
-                continue;
-            } elseif (!$inside_tag) {
-                // We are not inside tag but there are no more tags
-                // If we're already at the end, break
-                if ($cursor === strlen($html)) break;
-                // Create Text of rest of string
-                $token = new
-                    HTMLPurifier_Token_Text(
-                        $this->parseData(
-                            substr(
-                                $html, $cursor
-                            )
-                        )
-                    );
-                if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
-                $array[] = $token;
-                break;
-            } elseif ($inside_tag && $position_next_gt !== false) {
-                // We are in tag and it is well formed
-                // Grab the internals of the tag
-                $strlen_segment = $position_next_gt - $cursor;
-
-                if ($strlen_segment < 1) {
-                    // there's nothing to process!
-                    $token = new HTMLPurifier_Token_Text('<');
-                    $cursor++;
-                    continue;
-                }
-
-                $segment = substr($html, $cursor, $strlen_segment);
-
-                if ($segment === false) {
-                    // somehow, we attempted to access beyond the end of
-                    // the string, defense-in-depth, reported by Nate Abele
-                    break;
-                }
-
-                // Check if it's a comment
-                if (
-                    substr($segment, 0, 3) === '!--'
-                ) {
-                    // re-determine segment length, looking for -->
-                    $position_comment_end = strpos($html, '-->', $cursor);
-                    if ($position_comment_end === false) {
-                        // uh oh, we have a comment that extends to
-                        // infinity. Can't be helped: set comment
-                        // end position to end of string
-                        if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment');
-                        $position_comment_end = strlen($html);
-                        $end = true;
-                    } else {
-                        $end = false;
-                    }
-                    $strlen_segment = $position_comment_end - $cursor;
-                    $segment = substr($html, $cursor, $strlen_segment);
-                    $token = new
-                        HTMLPurifier_Token_Comment(
-                            substr(
-                                $segment, 3, $strlen_segment - 3
-                            )
-                        );
-                    if ($maintain_line_numbers) {
-                        $token->rawPosition($current_line, $current_col);
-                        $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
-                    }
-                    $array[] = $token;
-                    $cursor = $end ? $position_comment_end : $position_comment_end + 3;
-                    $inside_tag = false;
-                    continue;
-                }
-
-                // Check if it's an end tag
-                $is_end_tag = (strpos($segment,'/') === 0);
-                if ($is_end_tag) {
-                    $type = substr($segment, 1);
-                    $token = new HTMLPurifier_Token_End($type);
-                    if ($maintain_line_numbers) {
-                        $token->rawPosition($current_line, $current_col);
-                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
-                    }
-                    $array[] = $token;
-                    $inside_tag = false;
-                    $cursor = $position_next_gt + 1;
-                    continue;
-                }
-
-                // Check leading character is alnum, if not, we may
-                // have accidently grabbed an emoticon. Translate into
-                // text and go our merry way
-                if (!ctype_alpha($segment[0])) {
-                    // XML:  $segment[0] !== '_' && $segment[0] !== ':'
-                    if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
-                    $token = new HTMLPurifier_Token_Text('<');
-                    if ($maintain_line_numbers) {
-                        $token->rawPosition($current_line, $current_col);
-                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
-                    }
-                    $array[] = $token;
-                    $inside_tag = false;
-                    continue;
-                }
-
-                // Check if it is explicitly self closing, if so, remove
-                // trailing slash. Remember, we could have a tag like <br>, so
-                // any later token processing scripts must convert improperly
-                // classified EmptyTags from StartTags.
-                $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
-                if ($is_self_closing) {
-                    $strlen_segment--;
-                    $segment = substr($segment, 0, $strlen_segment);
-                }
-
-                // Check if there are any attributes
-                $position_first_space = strcspn($segment, $this->_whitespace);
-
-                if ($position_first_space >= $strlen_segment) {
-                    if ($is_self_closing) {
-                        $token = new HTMLPurifier_Token_Empty($segment);
-                    } else {
-                        $token = new HTMLPurifier_Token_Start($segment);
-                    }
-                    if ($maintain_line_numbers) {
-                        $token->rawPosition($current_line, $current_col);
-                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
-                    }
-                    $array[] = $token;
-                    $inside_tag = false;
-                    $cursor = $position_next_gt + 1;
-                    continue;
-                }
-
-                // Grab out all the data
-                $type = substr($segment, 0, $position_first_space);
-                $attribute_string =
-                    trim(
-                        substr(
-                            $segment, $position_first_space
-                        )
-                    );
-                if ($attribute_string) {
-                    $attr = $this->parseAttributeString(
-                                    $attribute_string
-                                  , $config, $context
-                              );
-                } else {
-                    $attr = array();
-                }
-
-                if ($is_self_closing) {
-                    $token = new HTMLPurifier_Token_Empty($type, $attr);
-                } else {
-                    $token = new HTMLPurifier_Token_Start($type, $attr);
-                }
-                if ($maintain_line_numbers) {
-                    $token->rawPosition($current_line, $current_col);
-                    $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
-                }
-                $array[] = $token;
-                $cursor = $position_next_gt + 1;
-                $inside_tag = false;
-                continue;
-            } else {
-                // inside tag, but there's no ending > sign
-                if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
-                $token = new
-                    HTMLPurifier_Token_Text(
-                        '<' .
-                        $this->parseData(
-                            substr($html, $cursor)
-                        )
-                    );
-                if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
-                // no cursor scroll? Hmm...
-                $array[] = $token;
-                break;
-            }
-            break;
-        }
-
-        $context->destroy('CurrentLine');
-        $context->destroy('CurrentCol');
-        return $array;
-    }
-
-    /**
-     * PHP 5.0.x compatible substr_count that implements offset and length
-     */
-    protected function substrCount($haystack, $needle, $offset, $length) {
-        static $oldVersion;
-        if ($oldVersion === null) {
-            $oldVersion = version_compare(PHP_VERSION, '5.1', '<');
-        }
-        if ($oldVersion) {
-            $haystack = substr($haystack, $offset, $length);
-            return substr_count($haystack, $needle);
-        } else {
-            return substr_count($haystack, $needle, $offset, $length);
-        }
-    }
-
-    /**
-     * Takes the inside of an HTML tag and makes an assoc array of attributes.
-     *
-     * @param $string Inside of tag excluding name.
-     * @returns Assoc array of attributes.
-     */
-    public function parseAttributeString($string, $config, $context) {
-        $string = (string) $string; // quick typecast
-
-        if ($string == '') return array(); // no attributes
-
-        $e = false;
-        if ($config->get('Core.CollectErrors')) {
-            $e =& $context->get('ErrorCollector');
-        }
-
-        // let's see if we can abort as quickly as possible
-        // one equal sign, no spaces => one attribute
-        $num_equal = substr_count($string, '=');
-        $has_space = strpos($string, ' ');
-        if ($num_equal === 0 && !$has_space) {
-            // bool attribute
-            return array($string => $string);
-        } elseif ($num_equal === 1 && !$has_space) {
-            // only one attribute
-            list($key, $quoted_value) = explode('=', $string);
-            $quoted_value = trim($quoted_value);
-            if (!$key) {
-                if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
-                return array();
-            }
-            if (!$quoted_value) return array($key => '');
-            $first_char = @$quoted_value[0];
-            $last_char  = @$quoted_value[strlen($quoted_value)-1];
-
-            $same_quote = ($first_char == $last_char);
-            $open_quote = ($first_char == '"' || $first_char == "'");
-
-            if ( $same_quote && $open_quote) {
-                // well behaved
-                $value = substr($quoted_value, 1, strlen($quoted_value) - 2);
-            } else {
-                // not well behaved
-                if ($open_quote) {
-                    if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote');
-                    $value = substr($quoted_value, 1);
-                } else {
-                    $value = $quoted_value;
-                }
-            }
-            if ($value === false) $value = '';
-            return array($key => $this->parseData($value));
-        }
-
-        // setup loop environment
-        $array  = array(); // return assoc array of attributes
-        $cursor = 0; // current position in string (moves forward)
-        $size   = strlen($string); // size of the string (stays the same)
-
-        // if we have unquoted attributes, the parser expects a terminating
-        // space, so let's guarantee that there's always a terminating space.
-        $string .= ' ';
-
-        while(true) {
-
-            if ($cursor >= $size) {
-                break;
-            }
-
-            $cursor += ($value = strspn($string, $this->_whitespace, $cursor));
-            // grab the key
-
-            $key_begin = $cursor; //we're currently at the start of the key
-
-            // scroll past all characters that are the key (not whitespace or =)
-            $cursor += strcspn($string, $this->_whitespace . '=', $cursor);
-
-            $key_end = $cursor; // now at the end of the key
-
-            $key = substr($string, $key_begin, $key_end - $key_begin);
-
-            if (!$key) {
-                if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
-                $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
-                continue; // empty key
-            }
-
-            // scroll past all whitespace
-            $cursor += strspn($string, $this->_whitespace, $cursor);
-
-            if ($cursor >= $size) {
-                $array[$key] = $key;
-                break;
-            }
-
-            // if the next character is an equal sign, we've got a regular
-            // pair, otherwise, it's a bool attribute
-            $first_char = @$string[$cursor];
-
-            if ($first_char == '=') {
-                // key="value"
-
-                $cursor++;
-                $cursor += strspn($string, $this->_whitespace, $cursor);
-
-                if ($cursor === false) {
-                    $array[$key] = '';
-                    break;
-                }
-
-                // we might be in front of a quote right now
-
-                $char = @$string[$cursor];
-
-                if ($char == '"' || $char == "'") {
-                    // it's quoted, end bound is $char
-                    $cursor++;
-                    $value_begin = $cursor;
-                    $cursor = strpos($string, $char, $cursor);
-                    $value_end = $cursor;
-                } else {
-                    // it's not quoted, end bound is whitespace
-                    $value_begin = $cursor;
-                    $cursor += strcspn($string, $this->_whitespace, $cursor);
-                    $value_end = $cursor;
-                }
-
-                // we reached a premature end
-                if ($cursor === false) {
-                    $cursor = $size;
-                    $value_end = $cursor;
-                }
-
-                $value = substr($string, $value_begin, $value_end - $value_begin);
-                if ($value === false) $value = '';
-                $array[$key] = $this->parseData($value);
-                $cursor++;
-
-            } else {
-                // boolattr
-                if ($key !== '') {
-                    $array[$key] = $key;
-                } else {
-                    // purely theoretical
-                    if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
-                }
-
-            }
-        }
-        return $array;
-    }
+	public $tracksLineNumbers = true;
+
+	/**
+	 * Whitespace characters for str(c)spn.
+	 */
+	protected $_whitespace = "\x20\x09\x0D\x0A";
+
+	/**
+	 * Callback function for script CDATA fudge
+	 * @param $matches, in form of array(opening tag, contents, closing tag)
+	 */
+	protected function scriptCallback($matches) {
+		return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false) . $matches[3];
+	}
+
+	public function tokenizeHTML($html, $config, $context) {
+
+		// special normalization for script tags without any armor
+		// our "armor" heurstic is a < sign any number of whitespaces after
+		// the first script tag
+		if ($config->get('HTML.Trusted')) {
+			$html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
+				array($this, 'scriptCallback'), $html);
+		}
+
+		$html = $this->normalize($html, $config, $context);
+
+		$cursor = 0; // our location in the text
+		$inside_tag = false; // whether or not we're parsing the inside of a tag
+		$array = array(); // result array
+
+		// This is also treated to mean maintain *column* numbers too
+		$maintain_line_numbers = $config->get('Core.MaintainLineNumbers');
+
+		if ($maintain_line_numbers === null) {
+			// automatically determine line numbering by checking
+			// if error collection is on
+			$maintain_line_numbers = $config->get('Core.CollectErrors');
+		}
+
+		if ($maintain_line_numbers) {
+			$current_line = 1;
+			$current_col  = 0;
+			$length = strlen($html);
+		} else {
+			$current_line = false;
+			$current_col  = false;
+			$length = false;
+		}
+		$context->register('CurrentLine', $current_line);
+		$context->register('CurrentCol',  $current_col);
+		$nl = "\n";
+		// how often to manually recalculate. This will ALWAYS be right,
+		// but it's pretty wasteful. Set to 0 to turn off
+		$synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval');
+
+		$e = false;
+		if ($config->get('Core.CollectErrors')) {
+			$e =& $context->get('ErrorCollector');
+		}
+
+		// for testing synchronization
+		$loops = 0;
+
+		while(++$loops) {
+
+			// $cursor is either at the start of a token, or inside of
+			// a tag (i.e. there was a < immediately before it), as indicated
+			// by $inside_tag
+
+			if ($maintain_line_numbers) {
+
+				// $rcursor, however, is always at the start of a token.
+				$rcursor = $cursor - (int) $inside_tag;
+
+				// Column number is cheap, so we calculate it every round.
+				// We're interested at the *end* of the newline string, so
+				// we need to add strlen($nl) == 1 to $nl_pos before subtracting it
+				// from our "rcursor" position.
+				$nl_pos = strrpos($html, $nl, $rcursor - $length);
+				$current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
+
+				// recalculate lines
+				if (
+					$synchronize_interval &&  // synchronization is on
+					$cursor > 0 &&            // cursor is further than zero
+					$loops % $synchronize_interval === 0 // time to synchronize!
+				) {
+					$current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
+				}
+
+			}
+
+			$position_next_lt = strpos($html, '<', $cursor);
+			$position_next_gt = strpos($html, '>', $cursor);
+
+			// triggers on "<b>asdf</b>" but not "asdf <b></b>"
+			// special case to set up context
+			if ($position_next_lt === $cursor) {
+				$inside_tag = true;
+				$cursor++;
+			}
+
+			if (!$inside_tag && $position_next_lt !== false) {
+				// We are not inside tag and there still is another tag to parse
+				$token = new
+					HTMLPurifier_Token_Text(
+						$this->parseData(
+							substr(
+								$html, $cursor, $position_next_lt - $cursor
+							)
+						)
+					);
+				if ($maintain_line_numbers) {
+					$token->rawPosition($current_line, $current_col);
+					$current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
+				}
+				$array[] = $token;
+				$cursor  = $position_next_lt + 1;
+				$inside_tag = true;
+				continue;
+			} elseif (!$inside_tag) {
+				// We are not inside tag but there are no more tags
+				// If we're already at the end, break
+				if ($cursor === strlen($html)) break;
+				// Create Text of rest of string
+				$token = new
+					HTMLPurifier_Token_Text(
+						$this->parseData(
+							substr(
+								$html, $cursor
+							)
+						)
+					);
+				if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
+				$array[] = $token;
+				break;
+			} elseif ($inside_tag && $position_next_gt !== false) {
+				// We are in tag and it is well formed
+				// Grab the internals of the tag
+				$strlen_segment = $position_next_gt - $cursor;
+
+				if ($strlen_segment < 1) {
+					// there's nothing to process!
+					$token = new HTMLPurifier_Token_Text('<');
+					$cursor++;
+					continue;
+				}
+
+				$segment = substr($html, $cursor, $strlen_segment);
+
+				if ($segment === false) {
+					// somehow, we attempted to access beyond the end of
+					// the string, defense-in-depth, reported by Nate Abele
+					break;
+				}
+
+				// Check if it's a comment
+				if (
+					substr($segment, 0, 3) === '!--'
+				) {
+					// re-determine segment length, looking for -->
+					$position_comment_end = strpos($html, '-->', $cursor);
+					if ($position_comment_end === false) {
+						// uh oh, we have a comment that extends to
+						// infinity. Can't be helped: set comment
+						// end position to end of string
+						if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment');
+						$position_comment_end = strlen($html);
+						$end = true;
+					} else {
+						$end = false;
+					}
+					$strlen_segment = $position_comment_end - $cursor;
+					$segment = substr($html, $cursor, $strlen_segment);
+					$token = new
+						HTMLPurifier_Token_Comment(
+							substr(
+								$segment, 3, $strlen_segment - 3
+							)
+						);
+					if ($maintain_line_numbers) {
+						$token->rawPosition($current_line, $current_col);
+						$current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
+					}
+					$array[] = $token;
+					$cursor = $end ? $position_comment_end : $position_comment_end + 3;
+					$inside_tag = false;
+					continue;
+				}
+
+				// Check if it's an end tag
+				$is_end_tag = (strpos($segment,'/') === 0);
+				if ($is_end_tag) {
+					$type = substr($segment, 1);
+					$token = new HTMLPurifier_Token_End($type);
+					if ($maintain_line_numbers) {
+						$token->rawPosition($current_line, $current_col);
+						$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+					}
+					$array[] = $token;
+					$inside_tag = false;
+					$cursor = $position_next_gt + 1;
+					continue;
+				}
+
+				// Check leading character is alnum, if not, we may
+				// have accidently grabbed an emoticon. Translate into
+				// text and go our merry way
+				if (!ctype_alpha($segment[0])) {
+					// XML:  $segment[0] !== '_' && $segment[0] !== ':'
+					if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
+					$token = new HTMLPurifier_Token_Text('<');
+					if ($maintain_line_numbers) {
+						$token->rawPosition($current_line, $current_col);
+						$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+					}
+					$array[] = $token;
+					$inside_tag = false;
+					continue;
+				}
+
+				// Check if it is explicitly self closing, if so, remove
+				// trailing slash. Remember, we could have a tag like <br>, so
+				// any later token processing scripts must convert improperly
+				// classified EmptyTags from StartTags.
+				$is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
+				if ($is_self_closing) {
+					$strlen_segment--;
+					$segment = substr($segment, 0, $strlen_segment);
+				}
+
+				// Check if there are any attributes
+				$position_first_space = strcspn($segment, $this->_whitespace);
+
+				if ($position_first_space >= $strlen_segment) {
+					if ($is_self_closing) {
+						$token = new HTMLPurifier_Token_Empty($segment);
+					} else {
+						$token = new HTMLPurifier_Token_Start($segment);
+					}
+					if ($maintain_line_numbers) {
+						$token->rawPosition($current_line, $current_col);
+						$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+					}
+					$array[] = $token;
+					$inside_tag = false;
+					$cursor = $position_next_gt + 1;
+					continue;
+				}
+
+				// Grab out all the data
+				$type = substr($segment, 0, $position_first_space);
+				$attribute_string =
+					trim(
+						substr(
+							$segment, $position_first_space
+						)
+					);
+				if ($attribute_string) {
+					$attr = $this->parseAttributeString(
+									$attribute_string
+								  , $config, $context
+							  );
+				} else {
+					$attr = array();
+				}
+
+				if ($is_self_closing) {
+					$token = new HTMLPurifier_Token_Empty($type, $attr);
+				} else {
+					$token = new HTMLPurifier_Token_Start($type, $attr);
+				}
+				if ($maintain_line_numbers) {
+					$token->rawPosition($current_line, $current_col);
+					$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
+				}
+				$array[] = $token;
+				$cursor = $position_next_gt + 1;
+				$inside_tag = false;
+				continue;
+			} else {
+				// inside tag, but there's no ending > sign
+				if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
+				$token = new
+					HTMLPurifier_Token_Text(
+						'<' .
+						$this->parseData(
+							substr($html, $cursor)
+						)
+					);
+				if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
+				// no cursor scroll? Hmm...
+				$array[] = $token;
+				break;
+			}
+			break;
+		}
+
+		$context->destroy('CurrentLine');
+		$context->destroy('CurrentCol');
+		return $array;
+	}
+
+	/**
+	 * PHP 5.0.x compatible substr_count that implements offset and length
+	 */
+	protected function substrCount($haystack, $needle, $offset, $length) {
+		static $oldVersion;
+		if ($oldVersion === null) {
+			$oldVersion = version_compare(PHP_VERSION, '5.1', '<');
+		}
+		if ($oldVersion) {
+			$haystack = substr($haystack, $offset, $length);
+			return substr_count($haystack, $needle);
+		} else {
+			return substr_count($haystack, $needle, $offset, $length);
+		}
+	}
+
+	/**
+	 * Takes the inside of an HTML tag and makes an assoc array of attributes.
+	 *
+	 * @param $string Inside of tag excluding name.
+	 * @returns Assoc array of attributes.
+	 */
+	public function parseAttributeString($string, $config, $context) {
+		$string = (string) $string; // quick typecast
+
+		if ($string == '') return array(); // no attributes
+
+		$e = false;
+		if ($config->get('Core.CollectErrors')) {
+			$e =& $context->get('ErrorCollector');
+		}
+
+		// let's see if we can abort as quickly as possible
+		// one equal sign, no spaces => one attribute
+		$num_equal = substr_count($string, '=');
+		$has_space = strpos($string, ' ');
+		if ($num_equal === 0 && !$has_space) {
+			// bool attribute
+			return array($string => $string);
+		} elseif ($num_equal === 1 && !$has_space) {
+			// only one attribute
+			list($key, $quoted_value) = explode('=', $string);
+			$quoted_value = trim($quoted_value);
+			if (!$key) {
+				if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
+				return array();
+			}
+			if (!$quoted_value) return array($key => '');
+			$first_char = @$quoted_value[0];
+			$last_char  = @$quoted_value[strlen($quoted_value)-1];
+
+			$same_quote = ($first_char == $last_char);
+			$open_quote = ($first_char == '"' || $first_char == "'");
+
+			if ( $same_quote && $open_quote) {
+				// well behaved
+				$value = substr($quoted_value, 1, strlen($quoted_value) - 2);
+			} else {
+				// not well behaved
+				if ($open_quote) {
+					if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote');
+					$value = substr($quoted_value, 1);
+				} else {
+					$value = $quoted_value;
+				}
+			}
+			if ($value === false) $value = '';
+			return array($key => $this->parseData($value));
+		}
+
+		// setup loop environment
+		$array  = array(); // return assoc array of attributes
+		$cursor = 0; // current position in string (moves forward)
+		$size   = strlen($string); // size of the string (stays the same)
+
+		// if we have unquoted attributes, the parser expects a terminating
+		// space, so let's guarantee that there's always a terminating space.
+		$string .= ' ';
+
+		while(true) {
+
+			if ($cursor >= $size) {
+				break;
+			}
+
+			$cursor += ($value = strspn($string, $this->_whitespace, $cursor));
+			// grab the key
+
+			$key_begin = $cursor; //we're currently at the start of the key
+
+			// scroll past all characters that are the key (not whitespace or =)
+			$cursor += strcspn($string, $this->_whitespace . '=', $cursor);
+
+			$key_end = $cursor; // now at the end of the key
+
+			$key = substr($string, $key_begin, $key_end - $key_begin);
+
+			if (!$key) {
+				if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
+				$cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
+				continue; // empty key
+			}
+
+			// scroll past all whitespace
+			$cursor += strspn($string, $this->_whitespace, $cursor);
+
+			if ($cursor >= $size) {
+				$array[$key] = $key;
+				break;
+			}
+
+			// if the next character is an equal sign, we've got a regular
+			// pair, otherwise, it's a bool attribute
+			$first_char = @$string[$cursor];
+
+			if ($first_char == '=') {
+				// key="value"
+
+				$cursor++;
+				$cursor += strspn($string, $this->_whitespace, $cursor);
+
+				if ($cursor === false) {
+					$array[$key] = '';
+					break;
+				}
+
+				// we might be in front of a quote right now
+
+				$char = @$string[$cursor];
+
+				if ($char == '"' || $char == "'") {
+					// it's quoted, end bound is $char
+					$cursor++;
+					$value_begin = $cursor;
+					$cursor = strpos($string, $char, $cursor);
+					$value_end = $cursor;
+				} else {
+					// it's not quoted, end bound is whitespace
+					$value_begin = $cursor;
+					$cursor += strcspn($string, $this->_whitespace, $cursor);
+					$value_end = $cursor;
+				}
+
+				// we reached a premature end
+				if ($cursor === false) {
+					$cursor = $size;
+					$value_end = $cursor;
+				}
+
+				$value = substr($string, $value_begin, $value_end - $value_begin);
+				if ($value === false) $value = '';
+				$array[$key] = $this->parseData($value);
+				$cursor++;
+
+			} else {
+				// boolattr
+				if ($key !== '') {
+					$array[$key] = $key;
+				} else {
+					// purely theoretical
+					if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
+				}
+
+			}
+		}
+		return $array;
+	}
 
 }
 

Please login to merge, or discard this patch.

Braces +43 added lines, -14 removed lines patch added patch discarded remove patch

@@ -137,7 +137,9 @@  discard block
 block discarded – undo
             } elseif (!$inside_tag) {
                 // We are not inside tag but there are no more tags
                 // If we're already at the end, break
-                if ($cursor === strlen($html)) break;
+                if ($cursor === strlen($html)) {
+                	break;
+                }
                 // Create Text of rest of string
                 $token = new
                     HTMLPurifier_Token_Text(
@@ -147,7 +149,9 @@  discard block
 block discarded – undo
                             )
                         )
                     );
-                if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
+                if ($maintain_line_numbers) {
+                	$token->rawPosition($current_line, $current_col);
+                }
                 $array[] = $token;
                 break;
             } elseif ($inside_tag && $position_next_gt !== false) {
@@ -180,7 +184,9 @@  discard block
 block discarded – undo
                         // uh oh, we have a comment that extends to
                         // infinity. Can't be helped: set comment
                         // end position to end of string
-                        if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment');
+                        if ($e) {
+                        	$e->send(E_WARNING, 'Lexer: Unclosed comment');
+                        }
                         $position_comment_end = strlen($html);
                         $end = true;
                     } else {
@@ -224,7 +230,9 @@  discard block
 block discarded – undo
                 // text and go our merry way
                 if (!ctype_alpha($segment[0])) {
                     // XML:  $segment[0] !== '_' && $segment[0] !== ':'
-                    if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
+                    if ($e) {
+                    	$e->send(E_NOTICE, 'Lexer: Unescaped lt');
+                    }
                     $token = new HTMLPurifier_Token_Text('<');
                     if ($maintain_line_numbers) {
                         $token->rawPosition($current_line, $current_col);
@@ -296,7 +304,9 @@  discard block
 block discarded – undo
                 continue;
             } else {
                 // inside tag, but there's no ending > sign
-                if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
+                if ($e) {
+                	$e->send(E_WARNING, 'Lexer: Missing gt');
+                }
                 $token = new
                     HTMLPurifier_Token_Text(
                         '<' .
@@ -304,7 +314,9 @@  discard block
 block discarded – undo
                             substr($html, $cursor)
                         )
                     );
-                if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
+                if ($maintain_line_numbers) {
+                	$token->rawPosition($current_line, $current_col);
+                }
                 // no cursor scroll? Hmm...
                 $array[] = $token;
                 break;
@@ -342,7 +354,10 @@  discard block
 block discarded – undo
     public function parseAttributeString($string, $config, $context) {
         $string = (string) $string; // quick typecast
 
-        if ($string == '') return array(); // no attributes
+        if ($string == '') {
+        	return array();
+        }
+        // no attributes
 
         $e = false;
         if ($config->get('Core.CollectErrors')) {
@@ -361,10 +376,14 @@  discard block
 block discarded – undo
             list($key, $quoted_value) = explode('=', $string);
             $quoted_value = trim($quoted_value);
             if (!$key) {
-                if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
+                if ($e) {
+                	$e->send(E_ERROR, 'Lexer: Missing attribute key');
+                }
                 return array();
             }
-            if (!$quoted_value) return array($key => '');
+            if (!$quoted_value) {
+            	return array($key => '');
+            }
             $first_char = @$quoted_value[0];
             $last_char  = @$quoted_value[strlen($quoted_value)-1];
 
@@ -377,13 +396,17 @@  discard block
 block discarded – undo
             } else {
                 // not well behaved
                 if ($open_quote) {
-                    if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote');
+                    if ($e) {
+                    	$e->send(E_ERROR, 'Lexer: Missing end quote');
+                    }
                     $value = substr($quoted_value, 1);
                 } else {
                     $value = $quoted_value;
                 }
             }
-            if ($value === false) $value = '';
+            if ($value === false) {
+            	$value = '';
+            }
             return array($key => $this->parseData($value));
         }
 
@@ -415,7 +438,9 @@  discard block
 block discarded – undo
             $key = substr($string, $key_begin, $key_end - $key_begin);
 
             if (!$key) {
-                if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
+                if ($e) {
+                	$e->send(E_ERROR, 'Lexer: Missing attribute key');
+                }
                 $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
                 continue; // empty key
             }
@@ -467,7 +492,9 @@  discard block
 block discarded – undo
                 }
 
                 $value = substr($string, $value_begin, $value_end - $value_begin);
-                if ($value === false) $value = '';
+                if ($value === false) {
+                	$value = '';
+                }
                 $array[$key] = $this->parseData($value);
                 $cursor++;
 
@@ -477,7 +504,9 @@  discard block
 block discarded – undo
                     $array[$key] = $key;
                 } else {
                     // purely theoretical
-                    if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
+                    if ($e) {
+                    	$e->send(E_ERROR, 'Lexer: Missing attribute key');
+                    }
                 }
 
             }

Please login to merge, or discard this patch.

Spacing +14 added lines, -14 removed lines patch added patch discarded remove patch

@@ -25,7 +25,7 @@  discard block
 block discarded – undo
      * @param $matches, in form of array(opening tag, contents, closing tag)
      */
     protected function scriptCallback($matches) {
-        return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false) . $matches[3];
+        return $matches[1].htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false).$matches[3];
     }
 
     public function tokenizeHTML($html, $config, $context) {
@@ -63,7 +63,7 @@  discard block
 block discarded – undo
             $length = false;
         }
         $context->register('CurrentLine', $current_line);
-        $context->register('CurrentCol',  $current_col);
+        $context->register('CurrentCol', $current_col);
         $nl = "\n";
         // how often to manually recalculate. This will ALWAYS be right,
         // but it's pretty wasteful. Set to 0 to turn off
@@ -71,13 +71,13 @@  discard block
 block discarded – undo
 
         $e = false;
         if ($config->get('Core.CollectErrors')) {
-            $e =& $context->get('ErrorCollector');
+            $e = & $context->get('ErrorCollector');
         }
 
         // for testing synchronization
         $loops = 0;
 
-        while(++$loops) {
+        while (++$loops) {
 
             // $cursor is either at the start of a token, or inside of
             // a tag (i.e. there was a < immediately before it), as indicated
@@ -97,8 +97,8 @@  discard block
 block discarded – undo
 
                 // recalculate lines
                 if (
-                    $synchronize_interval &&  // synchronization is on
-                    $cursor > 0 &&            // cursor is further than zero
+                    $synchronize_interval && // synchronization is on
+                    $cursor > 0 && // cursor is further than zero
                     $loops % $synchronize_interval === 0 // time to synchronize!
                 ) {
                     $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
@@ -205,7 +205,7 @@  discard block
 block discarded – undo
                 }
 
                 // Check if it's an end tag
-                $is_end_tag = (strpos($segment,'/') === 0);
+                $is_end_tag = (strpos($segment, '/') === 0);
                 if ($is_end_tag) {
                     $type = substr($segment, 1);
                     $token = new HTMLPurifier_Token_End($type);
@@ -239,7 +239,7 @@  discard block
 block discarded – undo
                 // trailing slash. Remember, we could have a tag like <br>, so
                 // any later token processing scripts must convert improperly
                 // classified EmptyTags from StartTags.
-                $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
+                $is_self_closing = (strrpos($segment, '/') === $strlen_segment - 1);
                 if ($is_self_closing) {
                     $strlen_segment--;
                     $segment = substr($segment, 0, $strlen_segment);
@@ -299,7 +299,7 @@  discard block
 block discarded – undo
                 if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
                 $token = new
                     HTMLPurifier_Token_Text(
-                        '<' .
+                        '<'.
                         $this->parseData(
                             substr($html, $cursor)
                         )
@@ -346,7 +346,7 @@  discard block
 block discarded – undo
 
         $e = false;
         if ($config->get('Core.CollectErrors')) {
-            $e =& $context->get('ErrorCollector');
+            $e = & $context->get('ErrorCollector');
         }
 
         // let's see if we can abort as quickly as possible
@@ -366,12 +366,12 @@  discard block
 block discarded – undo
             }
             if (!$quoted_value) return array($key => '');
             $first_char = @$quoted_value[0];
-            $last_char  = @$quoted_value[strlen($quoted_value)-1];
+            $last_char  = @$quoted_value[strlen($quoted_value) - 1];
 
             $same_quote = ($first_char == $last_char);
             $open_quote = ($first_char == '"' || $first_char == "'");
 
-            if ( $same_quote && $open_quote) {
+            if ($same_quote && $open_quote) {
                 // well behaved
                 $value = substr($quoted_value, 1, strlen($quoted_value) - 2);
             } else {
@@ -396,7 +396,7 @@  discard block
 block discarded – undo
         // space, so let's guarantee that there's always a terminating space.
         $string .= ' ';
 
-        while(true) {
+        while (true) {
 
             if ($cursor >= $size) {
                 break;
@@ -408,7 +408,7 @@  discard block
 block discarded – undo
             $key_begin = $cursor; //we're currently at the start of the key
 
             // scroll past all characters that are the key (not whitespace or =)
-            $cursor += strcspn($string, $this->_whitespace . '=', $cursor);
+            $cursor += strcspn($string, $this->_whitespace.'=', $cursor);
 
             $key_end = $cursor; // now at the end of the key
 

Please login to merge, or discard this patch.

GitHub Access Token became invalid

Push — master ( b130b6...8a2f54 )

Status

Category

Doc Comments +5 added lines patch added patch discarded remove patch

Indentation +526 added lines, -526 removed lines patch added patch discarded remove patch

Braces +33 added lines, -11 removed lines patch added patch discarded remove patch

Spacing +21 added lines, -21 removed lines patch added patch discarded remove patch

Doc Comments +3 added lines patch added patch discarded remove patch

Indentation +196 added lines, -196 removed lines patch added patch discarded remove patch

Braces +15 added lines, -5 removed lines patch added patch discarded remove patch

Spacing +7 added lines, -7 removed lines patch added patch discarded remove patch

Doc Comments +4 added lines, -4 removed lines patch added patch discarded remove patch

Indentation +238 added lines, -238 removed lines patch added patch discarded remove patch

Braces +21 added lines, -7 removed lines patch added patch discarded remove patch

Spacing +9 added lines, -9 removed lines patch added patch discarded remove patch

Doc Comments +1 added lines, -2 removed lines patch added patch discarded remove patch

Indentation +392 added lines, -392 removed lines patch added patch discarded remove patch

Braces +41 added lines, -15 removed lines patch added patch discarded remove patch

Spacing +4 added lines, -4 removed lines patch added patch discarded remove patch

Doc Comments +2 added lines, -1 removed lines patch added patch discarded remove patch

Indentation +406 added lines, -406 removed lines patch added patch discarded remove patch

Braces +24 added lines, -8 removed lines patch added patch discarded remove patch

Spacing +6 added lines, -6 removed lines patch added patch discarded remove patch

Doc Comments +1 added lines, -1 removed lines patch added patch discarded remove patch

Indentation +35 added lines, -35 removed lines patch added patch discarded remove patch

Braces +3 added lines, -1 removed lines patch added patch discarded remove patch

Doc Comments +3 added lines, -2 removed lines patch added patch discarded remove patch

Indentation +216 added lines, -216 removed lines patch added patch discarded remove patch

Spacing +4 added lines, -4 removed lines patch added patch discarded remove patch

Braces +49 added lines, -18 removed lines patch added patch discarded remove patch

Doc Comments +1 added lines, -1 removed lines patch added patch discarded remove patch

Indentation +150 added lines, -150 removed lines patch added patch discarded remove patch

Braces +30 added lines, -10 removed lines patch added patch discarded remove patch

Spacing +1 added lines, -1 removed lines patch added patch discarded remove patch

Doc Comments +4 added lines, -1 removed lines patch added patch discarded remove patch

Indentation +471 added lines, -471 removed lines patch added patch discarded remove patch

Braces +43 added lines, -14 removed lines patch added patch discarded remove patch

Spacing +14 added lines, -14 removed lines patch added patch discarded remove patch

		@@ -31,6 +31,9 @@ discard block
		block discarded – undo
31	31
32	32	/**
33	33	* iconv wrapper which mutes errors and works around bugs.
	34	+ * @param string $in
	35	+ * @param string $out
	36	+ * @param string $text
34	37	*/
35	38	public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
36	39	$code = self::testIconvTruncateBug();
		@@ -332,6 +335,7 @@ discard block
		block discarded – undo
332	335
333	336	/**
334	337	* Converts a string to UTF-8 based on configuration.
	338	+ * @param HTMLPurifier_Context $context
335	339	*/
336	340	public static function convertToUTF8($str, $config, $context) {
337	341	$encoding = $config->get('Core.Encoding');
		@@ -362,6 +366,7 @@ discard block
		block discarded – undo
362	366	* Converts a string from UTF-8 based on configuration.
363	367	* @note Currently, this is a lossy conversion, with unexpressable
364	368	* characters being omitted.
	369	+ * @param HTMLPurifier_Context $context
365	370	*/
366	371	public static function convertFromUTF8($str, $config, $context) {
367	372	$encoding = $config->get('Core.Encoding');

		@@ -314,9 +314,15 @@ discard block
		block discarded – undo
314	314	}
315	315	// set up the actual character
316	316	$ret = '';
317		- if($w) $ret .= chr($w);
318		- if($z) $ret .= chr($z);
319		- if($y) $ret .= chr($y);
	317	+ if($w) {
	318	+ $ret .= chr($w);
	319	+ }
	320	+ if($z) {
	321	+ $ret .= chr($z);
	322	+ }
	323	+ if($y) {
	324	+ $ret .= chr($y);
	325	+ }
320	326	$ret .= chr($x);
321	327
322	328	return $ret;
		@@ -335,9 +341,13 @@ discard block
		block discarded – undo
335	341	*/
336	342	public static function convertToUTF8($str, $config, $context) {
337	343	$encoding = $config->get('Core.Encoding');
338		- if ($encoding === 'utf-8') return $str;
	344	+ if ($encoding === 'utf-8') {
	345	+ return $str;
	346	+ }
339	347	static $iconv = null;
340		- if ($iconv === null) $iconv = self::iconvAvailable();
	348	+ if ($iconv === null) {
	349	+ $iconv = self::iconvAvailable();
	350	+ }
341	351	if ($iconv && !$config->get('Test.ForceNoIconv')) {
342	352	// unaffected by bugs, since UTF-8 support all characters
343	353	$str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
		@@ -368,15 +378,21 @@ discard block
		block discarded – undo
368	378	if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
369	379	$str = self::convertToASCIIDumbLossless($str);
370	380	}
371		- if ($encoding === 'utf-8') return $str;
	381	+ if ($encoding === 'utf-8') {
	382	+ return $str;
	383	+ }
372	384	static $iconv = null;
373		- if ($iconv === null) $iconv = self::iconvAvailable();
	385	+ if ($iconv === null) {
	386	+ $iconv = self::iconvAvailable();
	387	+ }
374	388	if ($iconv && !$config->get('Test.ForceNoIconv')) {
375	389	// Undo our previous fix in convertToUTF8, otherwise iconv will barf
376	390	$ascii_fix = self::testEncodingSupportsASCII($encoding);
377	391	if (!$escape && !empty($ascii_fix)) {
378	392	$clear_fix = array();
379		- foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
	393	+ foreach ($ascii_fix as $utf8 => $native) {
	394	+ $clear_fix[$utf8] = '';
	395	+ }
380	396	$str = strtr($str, $clear_fix);
381	397	}
382	398	$str = strtr($str, array_flip($ascii_fix));
		@@ -503,7 +519,9 @@ discard block
		block discarded – undo
503	519	// If ICONV_UNUSABLE, this call is irrelevant
504	520	static $encodings = array();
505	521	if (!$bypass) {
506		- if (isset($encodings[$encoding])) return $encodings[$encoding];
	522	+ if (isset($encodings[$encoding])) {
	523	+ return $encodings[$encoding];
	524	+ }
507	525	$lenc = strtolower($encoding);
508	526	switch ($lenc) {
509	527	case 'shift_jis':
		@@ -511,10 +529,14 @@ discard block
		block discarded – undo
511	529	case 'johab':
512	530	return array("\xE2\x82\xA9" => '\\');
513	531	}
514		- if (strpos($lenc, 'iso-8859-') === 0) return array();
	532	+ if (strpos($lenc, 'iso-8859-') === 0) {
	533	+ return array();
	534	+ }
515	535	}
516	536	$ret = array();
517		- if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
	537	+ if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) {
	538	+ return false;
	539	+ }
518	540	for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
519	541	$c = chr($i); // UTF-8 char
520	542	$r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion

		@@ -132,7 +132,7 @@ discard block
		block discarded – undo
132	132	$char = '';
133	133
134	134	$len = strlen($str);
135		- for($i = 0; $i < $len; $i++) {
	135	+ for ($i = 0; $i < $len; $i++) {
136	136	$in = ord($str{$i});
137	137	$char .= $str[$i]; // append byte to char
138	138	if (0 == $mState) {
		@@ -252,7 +252,7 @@ discard block
		block discarded – undo
252	252	$mState = 0;
253	253	$mUcs4 = 0;
254	254	$mBytes = 1;
255		- $char ='';
	255	+ $char = '';
256	256	}
257	257	}
258	258	}
		@@ -286,8 +286,8 @@ discard block
		block discarded – undo
286	286	// +----------+----------+----------+----------+
287	287
288	288	public static function unichr($code) {
289		- if($code > 1114111 or $code < 0 or
290		- ($code >= 55296 and $code <= 57343) ) {
	289	+ if ($code > 1114111 or $code < 0 or
	290	+ ($code >= 55296 and $code <= 57343)) {
291	291	// bits are set outside the "valid" range as defined
292	292	// by UNICODE 4.1.0
293	293	return '';
		@@ -304,19 +304,19 @@ discard block
		block discarded – undo
304	304	$y = (($code & 2047) >> 6) \| 192;
305	305	} else {
306	306	$y = (($code & 4032) >> 6) \| 128;
307		- if($code < 65536) {
	307	+ if ($code < 65536) {
308	308	$z = (($code >> 12) & 15) \| 224;
309	309	} else {
310	310	$z = (($code >> 12) & 63) \| 128;
311		- $w = (($code >> 18) & 7) \| 240;
	311	+ $w = (($code >> 18) & 7) \| 240;
312	312	}
313	313	}
314	314	}
315	315	// set up the actual character
316	316	$ret = '';
317		- if($w) $ret .= chr($w);
318		- if($z) $ret .= chr($z);
319		- if($y) $ret .= chr($y);
	317	+ if ($w) $ret .= chr($w);
	318	+ if ($z) $ret .= chr($z);
	319	+ if ($y) $ret .= chr($y);
320	320	$ret .= chr($x);
321	321
322	322	return $ret;
		@@ -343,7 +343,7 @@ discard block
		block discarded – undo
343	343	$str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
344	344	if ($str === false) {
345	345	// $encoding is not a valid encoding
346		- trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
	346	+ trigger_error('Invalid encoding '.$encoding, E_USER_ERROR);
347	347	return '';
348	348	}
349	349	// If the string is bjorked by Shift_JIS or a similar encoding
		@@ -381,7 +381,7 @@ discard block
		block discarded – undo
381	381	}
382	382	$str = strtr($str, array_flip($ascii_fix));
383	383	// Normal stuff
384		- $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
	384	+ $str = self::iconv('utf-8', $encoding.'//IGNORE', $str);
385	385	return $str;
386	386	} elseif ($encoding === 'iso-8859-1') {
387	387	$str = utf8_decode($str);
		@@ -415,22 +415,22 @@ discard block
		block discarded – undo
415	415	$result = '';
416	416	$working = 0;
417	417	$len = strlen($str);
418		- for( $i = 0; $i < $len; $i++ ) {
419		- $bytevalue = ord( $str[$i] );
420		- if( $bytevalue <= 0x7F ) { //0xxx xxxx
421		- $result .= chr( $bytevalue );
	418	+ for ($i = 0; $i < $len; $i++) {
	419	+ $bytevalue = ord($str[$i]);
	420	+ if ($bytevalue <= 0x7F) { //0xxx xxxx
	421	+ $result .= chr($bytevalue);
422	422	$bytesleft = 0;
423		- } elseif( $bytevalue <= 0xBF ) { //10xx xxxx
	423	+ } elseif ($bytevalue <= 0xBF) { //10xx xxxx
424	424	$working = $working << 6;
425	425	$working += ($bytevalue & 0x3F);
426	426	$bytesleft--;
427		- if( $bytesleft <= 0 ) {
428		- $result .= "&#" . $working . ";";
	427	+ if ($bytesleft <= 0) {
	428	+ $result .= "&#".$working.";";
429	429	}
430		- } elseif( $bytevalue <= 0xDF ) { //110x xxxx
	430	+ } elseif ($bytevalue <= 0xDF) { //110x xxxx
431	431	$working = $bytevalue & 0x1F;
432	432	$bytesleft = 1;
433		- } elseif( $bytevalue <= 0xEF ) { //1110 xxxx
	433	+ } elseif ($bytevalue <= 0xEF) { //1110 xxxx
434	434	$working = $bytevalue & 0x0F;
435	435	$bytesleft = 2;
436	436	} else { //1111 0xxx
		@@ -470,7 +470,7 @@ discard block
		block discarded – undo
470	470	static $code = null;
471	471	if ($code === null) {
472	472	// better not use iconv, otherwise infinite loop!
473		- $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
	473	+ $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1".str_repeat('a', 9000));
474	474	if ($r === false) {
475	475	$code = self::ICONV_UNUSABLE;
476	476	} elseif (($c = strlen($r)) < 9000) {

		@@ -25,6 +25,9 @@
		block discarded – undo
25	25
26	26	protected $lines = array();
27	27
	28	+ /**
	29	+ * @param HTMLPurifier_Context $context
	30	+ */
28	31	public function __construct($context) {
29	32	$this->locale =& $context->get('Locale');
30	33	$this->context = $context;

		@@ -7,202 +7,202 @@
		block discarded – undo
7	7	class HTMLPurifier_ErrorCollector
8	8	{
9	9
10		- /**
11		- * Identifiers for the returned error array. These are purposely numeric
12		- * so list() can be used.
13		- */
14		- const LINENO = 0;
15		- const SEVERITY = 1;
16		- const MESSAGE = 2;
17		- const CHILDREN = 3;
18		-
19		- protected $errors;
20		- protected $_current;
21		- protected $_stacks = array(array());
22		- protected $locale;
23		- protected $generator;
24		- protected $context;
25		-
26		- protected $lines = array();
27		-
28		- public function __construct($context) {
29		- $this->locale =& $context->get('Locale');
30		- $this->context = $context;
31		- $this->_current =& $this->_stacks[0];
32		- $this->errors =& $this->_stacks[0];
33		- }
34		-
35		- /**
36		- * Sends an error message to the collector for later use
37		- * @param $severity int Error severity, PHP error style (don't use E_USER_)
38		- * @param $msg string Error message text
39		- * @param $subst1 string First substitution for $msg
40		- * @param $subst2 string ...
41		- */
42		- public function send($severity, $msg) {
43		-
44		- $args = array();
45		- if (func_num_args() > 2) {
46		- $args = func_get_args();
47		- array_shift($args);
48		- unset($args[0]);
49		- }
50		-
51		- $token = $this->context->get('CurrentToken', true);
52		- $line = $token ? $token->line : $this->context->get('CurrentLine', true);
53		- $col = $token ? $token->col : $this->context->get('CurrentCol', true);
54		- $attr = $this->context->get('CurrentAttr', true);
55		-
56		- // perform special substitutions, also add custom parameters
57		- $subst = array();
58		- if (!is_null($token)) {
59		- $args['CurrentToken'] = $token;
60		- }
61		- if (!is_null($attr)) {
62		- $subst['$CurrentAttr.Name'] = $attr;
63		- if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
64		- }
65		-
66		- if (empty($args)) {
67		- $msg = $this->locale->getMessage($msg);
68		- } else {
69		- $msg = $this->locale->formatMessage($msg, $args);
70		- }
71		-
72		- if (!empty($subst)) $msg = strtr($msg, $subst);
73		-
74		- // (numerically indexed)
75		- $error = array(
76		- self::LINENO => $line,
77		- self::SEVERITY => $severity,
78		- self::MESSAGE => $msg,
79		- self::CHILDREN => array()
80		- );
81		- $this->_current[] = $error;
82		-
83		-
84		- // NEW CODE BELOW ...
85		-
86		- $struct = null;
87		- // Top-level errors are either:
88		- // TOKEN type, if $value is set appropriately, or
89		- // "syntax" type, if $value is null
90		- $new_struct = new HTMLPurifier_ErrorStruct();
91		- $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
92		- if ($token) $new_struct->value = clone $token;
93		- if (is_int($line) && is_int($col)) {
94		- if (isset($this->lines[$line][$col])) {
95		- $struct = $this->lines[$line][$col];
96		- } else {
97		- $struct = $this->lines[$line][$col] = $new_struct;
98		- }
99		- // These ksorts may present a performance problem
100		- ksort($this->lines[$line], SORT_NUMERIC);
101		- } else {
102		- if (isset($this->lines[-1])) {
103		- $struct = $this->lines[-1];
104		- } else {
105		- $struct = $this->lines[-1] = $new_struct;
106		- }
107		- }
108		- ksort($this->lines, SORT_NUMERIC);
109		-
110		- // Now, check if we need to operate on a lower structure
111		- if (!empty($attr)) {
112		- $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
113		- if (!$struct->value) {
114		- $struct->value = array($attr, 'PUT VALUE HERE');
115		- }
116		- }
117		- if (!empty($cssprop)) {
118		- $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
119		- if (!$struct->value) {
120		- // if we tokenize CSS this might be a little more difficult to do
121		- $struct->value = array($cssprop, 'PUT VALUE HERE');
122		- }
123		- }
124		-
125		- // Ok, structs are all setup, now time to register the error
126		- $struct->addError($severity, $msg);
127		- }
128		-
129		- /**
130		- * Retrieves raw error data for custom formatter to use
131		- * @param List of arrays in format of array(line of error,
132		- * error severity, error message,
133		- * recursive sub-errors array)
134		- */
135		- public function getRaw() {
136		- return $this->errors;
137		- }
138		-
139		- /**
140		- * Default HTML formatting implementation for error messages
141		- * @param $config Configuration array, vital for HTML output nature
142		- * @param $errors Errors array to display; used for recursion.
143		- */
144		- public function getHTMLFormatted($config, $errors = null) {
145		- $ret = array();
146		-
147		- $this->generator = new HTMLPurifier_Generator($config, $this->context);
148		- if ($errors === null) $errors = $this->errors;
149		-
150		- // 'At line' message needs to be removed
151		-
152		- // generation code for new structure goes here. It needs to be recursive.
153		- foreach ($this->lines as $line => $col_array) {
154		- if ($line == -1) continue;
155		- foreach ($col_array as $col => $struct) {
156		- $this->_renderStruct($ret, $struct, $line, $col);
157		- }
158		- }
159		- if (isset($this->lines[-1])) {
160		- $this->_renderStruct($ret, $this->lines[-1]);
161		- }
162		-
163		- if (empty($errors)) {
164		- return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
165		- } else {
166		- return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
167		- }
168		-
169		- }
170		-
171		- private function _renderStruct(&$ret, $struct, $line = null, $col = null) {
172		- $stack = array($struct);
173		- $context_stack = array(array());
174		- while ($current = array_pop($stack)) {
175		- $context = array_pop($context_stack);
176		- foreach ($current->errors as $error) {
177		- list($severity, $msg) = $error;
178		- $string = '';
179		- $string .= '<div>';
180		- // W3C uses an icon to indicate the severity of the error.
181		- $error = $this->locale->getErrorName($severity);
182		- $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
183		- if (!is_null($line) && !is_null($col)) {
184		- $string .= "<em class=\"location\">Line $line, Column $col: </em> ";
185		- } else {
186		- $string .= '<em class="location">End of Document: </em> ';
187		- }
188		- $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
189		- $string .= '</div>';
190		- // Here, have a marker for the character on the column appropriate.
191		- // Be sure to clip extremely long lines.
192		- //$string .= '<pre>';
193		- //$string .= '';
194		- //$string .= '</pre>';
195		- $ret[] = $string;
196		- }
197		- foreach ($current->children as $type => $array) {
198		- $context[] = $current;
199		- $stack = array_merge($stack, array_reverse($array, true));
200		- for ($i = count($array); $i > 0; $i--) {
201		- $context_stack[] = $context;
202		- }
203		- }
204		- }
205		- }
	10	+ /**
	11	+ * Identifiers for the returned error array. These are purposely numeric
	12	+ * so list() can be used.
	13	+ */
	14	+ const LINENO = 0;
	15	+ const SEVERITY = 1;
	16	+ const MESSAGE = 2;
	17	+ const CHILDREN = 3;
	18	+
	19	+ protected $errors;
	20	+ protected $_current;
	21	+ protected $_stacks = array(array());
	22	+ protected $locale;
	23	+ protected $generator;
	24	+ protected $context;
	25	+
	26	+ protected $lines = array();
	27	+
	28	+ public function __construct($context) {
	29	+ $this->locale =& $context->get('Locale');
	30	+ $this->context = $context;
	31	+ $this->_current =& $this->_stacks[0];
	32	+ $this->errors =& $this->_stacks[0];
	33	+ }
	34	+
	35	+ /**
	36	+ * Sends an error message to the collector for later use
	37	+ * @param $severity int Error severity, PHP error style (don't use E_USER_)
	38	+ * @param $msg string Error message text
	39	+ * @param $subst1 string First substitution for $msg
	40	+ * @param $subst2 string ...
	41	+ */
	42	+ public function send($severity, $msg) {
	43	+
	44	+ $args = array();
	45	+ if (func_num_args() > 2) {
	46	+ $args = func_get_args();
	47	+ array_shift($args);
	48	+ unset($args[0]);
	49	+ }
	50	+
	51	+ $token = $this->context->get('CurrentToken', true);
	52	+ $line = $token ? $token->line : $this->context->get('CurrentLine', true);
	53	+ $col = $token ? $token->col : $this->context->get('CurrentCol', true);
	54	+ $attr = $this->context->get('CurrentAttr', true);
	55	+
	56	+ // perform special substitutions, also add custom parameters
	57	+ $subst = array();
	58	+ if (!is_null($token)) {
	59	+ $args['CurrentToken'] = $token;
	60	+ }
	61	+ if (!is_null($attr)) {
	62	+ $subst['$CurrentAttr.Name'] = $attr;
	63	+ if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
	64	+ }
	65	+
	66	+ if (empty($args)) {
	67	+ $msg = $this->locale->getMessage($msg);
	68	+ } else {
	69	+ $msg = $this->locale->formatMessage($msg, $args);
	70	+ }
	71	+
	72	+ if (!empty($subst)) $msg = strtr($msg, $subst);
	73	+
	74	+ // (numerically indexed)
	75	+ $error = array(
	76	+ self::LINENO => $line,
	77	+ self::SEVERITY => $severity,
	78	+ self::MESSAGE => $msg,
	79	+ self::CHILDREN => array()
	80	+ );
	81	+ $this->_current[] = $error;
	82	+
	83	+
	84	+ // NEW CODE BELOW ...
	85	+
	86	+ $struct = null;
	87	+ // Top-level errors are either:
	88	+ // TOKEN type, if $value is set appropriately, or
	89	+ // "syntax" type, if $value is null
	90	+ $new_struct = new HTMLPurifier_ErrorStruct();
	91	+ $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
	92	+ if ($token) $new_struct->value = clone $token;
	93	+ if (is_int($line) && is_int($col)) {
	94	+ if (isset($this->lines[$line][$col])) {
	95	+ $struct = $this->lines[$line][$col];
	96	+ } else {
	97	+ $struct = $this->lines[$line][$col] = $new_struct;
	98	+ }
	99	+ // These ksorts may present a performance problem
	100	+ ksort($this->lines[$line], SORT_NUMERIC);
	101	+ } else {
	102	+ if (isset($this->lines[-1])) {
	103	+ $struct = $this->lines[-1];
	104	+ } else {
	105	+ $struct = $this->lines[-1] = $new_struct;
	106	+ }
	107	+ }
	108	+ ksort($this->lines, SORT_NUMERIC);
	109	+
	110	+ // Now, check if we need to operate on a lower structure
	111	+ if (!empty($attr)) {
	112	+ $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
	113	+ if (!$struct->value) {
	114	+ $struct->value = array($attr, 'PUT VALUE HERE');
	115	+ }
	116	+ }
	117	+ if (!empty($cssprop)) {
	118	+ $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
	119	+ if (!$struct->value) {
	120	+ // if we tokenize CSS this might be a little more difficult to do
	121	+ $struct->value = array($cssprop, 'PUT VALUE HERE');
	122	+ }
	123	+ }
	124	+
	125	+ // Ok, structs are all setup, now time to register the error
	126	+ $struct->addError($severity, $msg);
	127	+ }
	128	+
	129	+ /**
	130	+ * Retrieves raw error data for custom formatter to use
	131	+ * @param List of arrays in format of array(line of error,
	132	+ * error severity, error message,
	133	+ * recursive sub-errors array)
	134	+ */
	135	+ public function getRaw() {
	136	+ return $this->errors;
	137	+ }
	138	+
	139	+ /**
	140	+ * Default HTML formatting implementation for error messages
	141	+ * @param $config Configuration array, vital for HTML output nature
	142	+ * @param $errors Errors array to display; used for recursion.
	143	+ */
	144	+ public function getHTMLFormatted($config, $errors = null) {
	145	+ $ret = array();
	146	+
	147	+ $this->generator = new HTMLPurifier_Generator($config, $this->context);
	148	+ if ($errors === null) $errors = $this->errors;
	149	+
	150	+ // 'At line' message needs to be removed
	151	+
	152	+ // generation code for new structure goes here. It needs to be recursive.
	153	+ foreach ($this->lines as $line => $col_array) {
	154	+ if ($line == -1) continue;
	155	+ foreach ($col_array as $col => $struct) {
	156	+ $this->_renderStruct($ret, $struct, $line, $col);
	157	+ }
	158	+ }
	159	+ if (isset($this->lines[-1])) {
	160	+ $this->_renderStruct($ret, $this->lines[-1]);
	161	+ }
	162	+
	163	+ if (empty($errors)) {
	164	+ return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
	165	+ } else {
	166	+ return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
	167	+ }
	168	+
	169	+ }
	170	+
	171	+ private function _renderStruct(&$ret, $struct, $line = null, $col = null) {
	172	+ $stack = array($struct);
	173	+ $context_stack = array(array());
	174	+ while ($current = array_pop($stack)) {
	175	+ $context = array_pop($context_stack);
	176	+ foreach ($current->errors as $error) {
	177	+ list($severity, $msg) = $error;
	178	+ $string = '';
	179	+ $string .= '<div>';
	180	+ // W3C uses an icon to indicate the severity of the error.
	181	+ $error = $this->locale->getErrorName($severity);
	182	+ $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
	183	+ if (!is_null($line) && !is_null($col)) {
	184	+ $string .= "<em class=\"location\">Line $line, Column $col: </em> ";
	185	+ } else {
	186	+ $string .= '<em class="location">End of Document: </em> ';
	187	+ }
	188	+ $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
	189	+ $string .= '</div>';
	190	+ // Here, have a marker for the character on the column appropriate.
	191	+ // Be sure to clip extremely long lines.
	192	+ //$string .= '<pre>';
	193	+ //$string .= '';
	194	+ //$string .= '</pre>';
	195	+ $ret[] = $string;
	196	+ }
	197	+ foreach ($current->children as $type => $array) {
	198	+ $context[] = $current;
	199	+ $stack = array_merge($stack, array_reverse($array, true));
	200	+ for ($i = count($array); $i > 0; $i--) {
	201	+ $context_stack[] = $context;
	202	+ }
	203	+ }
	204	+ }
	205	+ }
206	206
207	207	}
208	208

		@@ -60,7 +60,9 @@ discard block
		block discarded – undo
60	60	}
61	61	if (!is_null($attr)) {
62	62	$subst['$CurrentAttr.Name'] = $attr;
63		- if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
	63	+ if (isset($token->attr[$attr])) {
	64	+ $subst['$CurrentAttr.Value'] = $token->attr[$attr];
	65	+ }
64	66	}
65	67
66	68	if (empty($args)) {
		@@ -69,7 +71,9 @@ discard block
		block discarded – undo
69	71	$msg = $this->locale->formatMessage($msg, $args);
70	72	}
71	73
72		- if (!empty($subst)) $msg = strtr($msg, $subst);
	74	+ if (!empty($subst)) {
	75	+ $msg = strtr($msg, $subst);
	76	+ }
73	77
74	78	// (numerically indexed)
75	79	$error = array(
		@@ -89,7 +93,9 @@ discard block
		block discarded – undo
89	93	// "syntax" type, if $value is null
90	94	$new_struct = new HTMLPurifier_ErrorStruct();
91	95	$new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
92		- if ($token) $new_struct->value = clone $token;
	96	+ if ($token) {
	97	+ $new_struct->value = clone $token;
	98	+ }
93	99	if (is_int($line) && is_int($col)) {
94	100	if (isset($this->lines[$line][$col])) {
95	101	$struct = $this->lines[$line][$col];
		@@ -145,13 +151,17 @@ discard block
		block discarded – undo
145	151	$ret = array();
146	152
147	153	$this->generator = new HTMLPurifier_Generator($config, $this->context);
148		- if ($errors === null) $errors = $this->errors;
	154	+ if ($errors === null) {
	155	+ $errors = $this->errors;
	156	+ }
149	157
150	158	// 'At line' message needs to be removed
151	159
152	160	// generation code for new structure goes here. It needs to be recursive.
153	161	foreach ($this->lines as $line => $col_array) {
154		- if ($line == -1) continue;
	162	+ if ($line == -1) {
	163	+ continue;
	164	+ }
155	165	foreach ($col_array as $col => $struct) {
156	166	$this->_renderStruct($ret, $struct, $line, $col);
157	167	}

		@@ -26,10 +26,10 @@ discard block
		block discarded – undo
26	26	protected $lines = array();
27	27
28	28	public function __construct($context) {
29		- $this->locale =& $context->get('Locale');
	29	+ $this->locale = & $context->get('Locale');
30	30	$this->context = $context;
31		- $this->_current =& $this->_stacks[0];
32		- $this->errors =& $this->_stacks[0];
	31	+ $this->_current = & $this->_stacks[0];
	32	+ $this->errors = & $this->_stacks[0];
33	33	}
34	34
35	35	/**
		@@ -50,7 +50,7 @@ discard block
		block discarded – undo
50	50
51	51	$token = $this->context->get('CurrentToken', true);
52	52	$line = $token ? $token->line : $this->context->get('CurrentLine', true);
53		- $col = $token ? $token->col : $this->context->get('CurrentCol', true);
	53	+ $col = $token ? $token->col : $this->context->get('CurrentCol', true);
54	54	$attr = $this->context->get('CurrentAttr', true);
55	55
56	56	// perform special substitutions, also add custom parameters
		@@ -161,9 +161,9 @@ discard block
		block discarded – undo
161	161	}
162	162
163	163	if (empty($errors)) {
164		- return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
	164	+ return '<p>'.$this->locale->getMessage('ErrorCollector: No errors').'</p>';
165	165	} else {
166		- return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
	166	+ return '<ul><li>'.implode('</li><li>', $ret).'</li></ul>';
167	167	}
168	168
169	169	}
		@@ -185,7 +185,7 @@ discard block
		block discarded – undo
185	185	} else {
186	186	$string .= '<em class="location">End of Document: </em> ';
187	187	}
188		- $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
	188	+ $string .= '<strong class="description">'.$this->generator->escape($msg).'</strong> ';
189	189	$string .= '</div>';
190	190	// Here, have a marker for the character on the column appropriate.
191	191	// Be sure to clip extremely long lines.

		@@ -70,7 +70,7 @@ discard block
		block discarded – undo
70	70	* Generates HTML from an array of tokens.
71	71	* @param $tokens Array of HTMLPurifier_Token
72	72	* @param $config HTMLPurifier_Config object
73		- * @return Generated HTML
	73	+ * @return string HTML
74	74	*/
75	75	public function generateFromTokens($tokens) {
76	76	if (!$tokens) return '';
		@@ -115,7 +115,7 @@ discard block
		block discarded – undo
115	115	/**
116	116	* Generates HTML from a single token.
117	117	* @param $token HTMLPurifier_Token object.
118		- * @return Generated HTML
	118	+ * @return string HTML
119	119	*/
120	120	public function generateFromToken($token) {
121	121	if (!$token instanceof HTMLPurifier_Token) {
		@@ -181,7 +181,7 @@ discard block
		block discarded – undo
181	181	* @param $assoc_array_of_attributes Attribute array
182	182	* @param $element Name of element attributes are for, used to check
183	183	* attribute minimization.
184		- * @return Generate HTML fragment for insertion.
	184	+ * @return string HTML fragment for insertion.
185	185	*/
186	186	public function generateAttributes($assoc_array_of_attributes, $element = false) {
187	187	$html = '';
		@@ -238,7 +238,7 @@ discard block
		block discarded – undo
238	238	* for properly generating HTML here w/o using tokens, it stays
239	239	* public.
240	240	* @param $string String data to escape for HTML.
241		- * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
	241	+ * @param integer $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
242	242	* permissible for non-attribute output.
243	243	* @return String escaped data.
244	244	*/

		@@ -10,244 +10,244 @@
		block discarded – undo
10	10	class HTMLPurifier_Generator
11	11	{
12	12
13		- /**
14		- * Whether or not generator should produce XML output
15		- */
16		- private $_xhtml = true;
17		-
18		- /**
19		- * :HACK: Whether or not generator should comment the insides of <script> tags
20		- */
21		- private $_scriptFix = false;
22		-
23		- /**
24		- * Cache of HTMLDefinition during HTML output to determine whether or
25		- * not attributes should be minimized.
26		- */
27		- private $_def;
28		-
29		- /**
30		- * Cache of %Output.SortAttr
31		- */
32		- private $_sortAttr;
33		-
34		- /**
35		- * Cache of %Output.FlashCompat
36		- */
37		- private $_flashCompat;
38		-
39		- /**
40		- * Cache of %Output.FixInnerHTML
41		- */
42		- private $_innerHTMLFix;
43		-
44		- /**
45		- * Stack for keeping track of object information when outputting IE
46		- * compatibility code.
47		- */
48		- private $_flashStack = array();
49		-
50		- /**
51		- * Configuration for the generator
52		- */
53		- protected $config;
54		-
55		- /**
56		- * @param $config Instance of HTMLPurifier_Config
57		- * @param $context Instance of HTMLPurifier_Context
58		- */
59		- public function __construct($config, $context) {
60		- $this->config = $config;
61		- $this->_scriptFix = $config->get('Output.CommentScriptContents');
62		- $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
63		- $this->_sortAttr = $config->get('Output.SortAttr');
64		- $this->_flashCompat = $config->get('Output.FlashCompat');
65		- $this->_def = $config->getHTMLDefinition();
66		- $this->_xhtml = $this->_def->doctype->xml;
67		- }
68		-
69		- /**
70		- * Generates HTML from an array of tokens.
71		- * @param $tokens Array of HTMLPurifier_Token
72		- * @param $config HTMLPurifier_Config object
73		- * @return Generated HTML
74		- */
75		- public function generateFromTokens($tokens) {
76		- if (!$tokens) return '';
77		-
78		- // Basic algorithm
79		- $html = '';
80		- for ($i = 0, $size = count($tokens); $i < $size; $i++) {
81		- if ($this->_scriptFix && $tokens[$i]->name === 'script'
82		- && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
83		- // script special case
84		- // the contents of the script block must be ONE token
85		- // for this to work.
86		- $html .= $this->generateFromToken($tokens[$i++]);
87		- $html .= $this->generateScriptFromToken($tokens[$i++]);
88		- }
89		- $html .= $this->generateFromToken($tokens[$i]);
90		- }
91		-
92		- // Tidy cleanup
93		- if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
94		- $tidy = new Tidy;
95		- $tidy->parseString($html, array(
96		- 'indent'=> true,
97		- 'output-xhtml' => $this->_xhtml,
98		- 'show-body-only' => true,
99		- 'indent-spaces' => 2,
100		- 'wrap' => 68,
101		- ), 'utf8');
102		- $tidy->cleanRepair();
103		- $html = (string) $tidy; // explicit cast necessary
104		- }
105		-
106		- // Normalize newlines to system defined value
107		- if ($this->config->get('Core.NormalizeNewlines')) {
108		- $nl = $this->config->get('Output.Newline');
109		- if ($nl === null) $nl = PHP_EOL;
110		- if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
111		- }
112		- return $html;
113		- }
114		-
115		- /**
116		- * Generates HTML from a single token.
117		- * @param $token HTMLPurifier_Token object.
118		- * @return Generated HTML
119		- */
120		- public function generateFromToken($token) {
121		- if (!$token instanceof HTMLPurifier_Token) {
122		- trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
123		- return '';
124		-
125		- } elseif ($token instanceof HTMLPurifier_Token_Start) {
126		- $attr = $this->generateAttributes($token->attr, $token->name);
127		- if ($this->_flashCompat) {
128		- if ($token->name == "object") {
129		- $flash = new stdclass();
130		- $flash->attr = $token->attr;
131		- $flash->param = array();
132		- $this->_flashStack[] = $flash;
133		- }
134		- }
135		- return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
136		-
137		- } elseif ($token instanceof HTMLPurifier_Token_End) {
138		- $_extra = '';
139		- if ($this->_flashCompat) {
140		- if ($token->name == "object" && !empty($this->_flashStack)) {
141		- // doesn't do anything for now
142		- }
143		- }
144		- return $_extra . '</' . $token->name . '>';
145		-
146		- } elseif ($token instanceof HTMLPurifier_Token_Empty) {
147		- if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
148		- $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
149		- }
150		- $attr = $this->generateAttributes($token->attr, $token->name);
151		- return '<' . $token->name . ($attr ? ' ' : '') . $attr .
152		- ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
153		- . '>';
154		-
155		- } elseif ($token instanceof HTMLPurifier_Token_Text) {
156		- return $this->escape($token->data, ENT_NOQUOTES);
157		-
158		- } elseif ($token instanceof HTMLPurifier_Token_Comment) {
159		- return '<!--' . $token->data . '-->';
160		- } else {
161		- return '';
162		-
163		- }
164		- }
165		-
166		- /**
167		- * Special case processor for the contents of script tags
168		- * @warning This runs into problems if there's already a literal
169		- * --> somewhere inside the script contents.
170		- */
171		- public function generateScriptFromToken($token) {
172		- if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
173		- // Thanks <http://lachy.id.au/log/2005/05/script-comments>
174		- $data = preg_replace('#//\s*$#', '', $token->data);
175		- return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
176		- }
177		-
178		- /**
179		- * Generates attribute declarations from attribute array.
180		- * @note This does not include the leading or trailing space.
181		- * @param $assoc_array_of_attributes Attribute array
182		- * @param $element Name of element attributes are for, used to check
183		- * attribute minimization.
184		- * @return Generate HTML fragment for insertion.
185		- */
186		- public function generateAttributes($assoc_array_of_attributes, $element = false) {
187		- $html = '';
188		- if ($this->_sortAttr) ksort($assoc_array_of_attributes);
189		- foreach ($assoc_array_of_attributes as $key => $value) {
190		- if (!$this->_xhtml) {
191		- // Remove namespaced attributes
192		- if (strpos($key, ':') !== false) continue;
193		- // Check if we should minimize the attribute: val="val" -> val
194		- if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
195		- $html .= $key . ' ';
196		- continue;
197		- }
198		- }
199		- // Workaround for Internet Explorer innerHTML bug.
200		- // Essentially, Internet Explorer, when calculating
201		- // innerHTML, omits quotes if there are no instances of
202		- // angled brackets, quotes or spaces. However, when parsing
203		- // HTML (for example, when you assign to innerHTML), it
204		- // treats backticks as quotes. Thus,
205		- // <img alt="``" />
206		- // becomes
207		- // <img alt=`` />
208		- // becomes
209		- // <img alt='' />
210		- // Fortunately, all we need to do is trigger an appropriate
211		- // quoting style, which we do by adding an extra space.
212		- // This also is consistent with the W3C spec, which states
213		- // that user agents may ignore leading or trailing
214		- // whitespace (in fact, most don't, at least for attributes
215		- // like alt, but an extra space at the end is barely
216		- // noticeable). Still, we have a configuration knob for
217		- // this, since this transformation is not necesary if you
218		- // don't process user input with innerHTML or you don't plan
219		- // on supporting Internet Explorer.
220		- if ($this->_innerHTMLFix) {
221		- if (strpos($value, '`') !== false) {
222		- // check if correct quoting style would not already be
223		- // triggered
224		- if (strcspn($value, '"\' <>') === strlen($value)) {
225		- // protect!
226		- $value .= ' ';
227		- }
228		- }
229		- }
230		- $html .= $key.'="'.$this->escape($value).'" ';
231		- }
232		- return rtrim($html);
233		- }
234		-
235		- /**
236		- * Escapes raw text data.
237		- * @todo This really ought to be protected, but until we have a facility
238		- * for properly generating HTML here w/o using tokens, it stays
239		- * public.
240		- * @param $string String data to escape for HTML.
241		- * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
242		- * permissible for non-attribute output.
243		- * @return String escaped data.
244		- */
245		- public function escape($string, $quote = null) {
246		- // Workaround for APC bug on Mac Leopard reported by sidepodcast
247		- // http://htmlpurifier.org/phorum/read.php?3,4823,4846
248		- if ($quote === null) $quote = ENT_COMPAT;
249		- return htmlspecialchars($string, $quote, 'UTF-8', false);
250		- }
	13	+ /**
	14	+ * Whether or not generator should produce XML output
	15	+ */
	16	+ private $_xhtml = true;
	17	+
	18	+ /**
	19	+ * :HACK: Whether or not generator should comment the insides of <script> tags
	20	+ */
	21	+ private $_scriptFix = false;
	22	+
	23	+ /**
	24	+ * Cache of HTMLDefinition during HTML output to determine whether or
	25	+ * not attributes should be minimized.
	26	+ */
	27	+ private $_def;
	28	+
	29	+ /**
	30	+ * Cache of %Output.SortAttr
	31	+ */
	32	+ private $_sortAttr;
	33	+
	34	+ /**
	35	+ * Cache of %Output.FlashCompat
	36	+ */
	37	+ private $_flashCompat;
	38	+
	39	+ /**
	40	+ * Cache of %Output.FixInnerHTML
	41	+ */
	42	+ private $_innerHTMLFix;
	43	+
	44	+ /**
	45	+ * Stack for keeping track of object information when outputting IE
	46	+ * compatibility code.
	47	+ */
	48	+ private $_flashStack = array();
	49	+
	50	+ /**
	51	+ * Configuration for the generator
	52	+ */
	53	+ protected $config;
	54	+
	55	+ /**
	56	+ * @param $config Instance of HTMLPurifier_Config
	57	+ * @param $context Instance of HTMLPurifier_Context
	58	+ */
	59	+ public function __construct($config, $context) {
	60	+ $this->config = $config;
	61	+ $this->_scriptFix = $config->get('Output.CommentScriptContents');
	62	+ $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
	63	+ $this->_sortAttr = $config->get('Output.SortAttr');
	64	+ $this->_flashCompat = $config->get('Output.FlashCompat');
	65	+ $this->_def = $config->getHTMLDefinition();
	66	+ $this->_xhtml = $this->_def->doctype->xml;
	67	+ }
	68	+
	69	+ /**
	70	+ * Generates HTML from an array of tokens.
	71	+ * @param $tokens Array of HTMLPurifier_Token
	72	+ * @param $config HTMLPurifier_Config object
	73	+ * @return Generated HTML
	74	+ */
	75	+ public function generateFromTokens($tokens) {
	76	+ if (!$tokens) return '';
	77	+
	78	+ // Basic algorithm
	79	+ $html = '';
	80	+ for ($i = 0, $size = count($tokens); $i < $size; $i++) {
	81	+ if ($this->_scriptFix && $tokens[$i]->name === 'script'
	82	+ && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
	83	+ // script special case
	84	+ // the contents of the script block must be ONE token
	85	+ // for this to work.
	86	+ $html .= $this->generateFromToken($tokens[$i++]);
	87	+ $html .= $this->generateScriptFromToken($tokens[$i++]);
	88	+ }
	89	+ $html .= $this->generateFromToken($tokens[$i]);
	90	+ }
	91	+
	92	+ // Tidy cleanup
	93	+ if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
	94	+ $tidy = new Tidy;
	95	+ $tidy->parseString($html, array(
	96	+ 'indent'=> true,
	97	+ 'output-xhtml' => $this->_xhtml,
	98	+ 'show-body-only' => true,
	99	+ 'indent-spaces' => 2,
	100	+ 'wrap' => 68,
	101	+ ), 'utf8');
	102	+ $tidy->cleanRepair();
	103	+ $html = (string) $tidy; // explicit cast necessary
	104	+ }
	105	+
	106	+ // Normalize newlines to system defined value
	107	+ if ($this->config->get('Core.NormalizeNewlines')) {
	108	+ $nl = $this->config->get('Output.Newline');
	109	+ if ($nl === null) $nl = PHP_EOL;
	110	+ if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
	111	+ }
	112	+ return $html;
	113	+ }
	114	+
	115	+ /**
	116	+ * Generates HTML from a single token.
	117	+ * @param $token HTMLPurifier_Token object.
	118	+ * @return Generated HTML
	119	+ */
	120	+ public function generateFromToken($token) {
	121	+ if (!$token instanceof HTMLPurifier_Token) {
	122	+ trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
	123	+ return '';
	124	+
	125	+ } elseif ($token instanceof HTMLPurifier_Token_Start) {
	126	+ $attr = $this->generateAttributes($token->attr, $token->name);
	127	+ if ($this->_flashCompat) {
	128	+ if ($token->name == "object") {
	129	+ $flash = new stdclass();
	130	+ $flash->attr = $token->attr;
	131	+ $flash->param = array();
	132	+ $this->_flashStack[] = $flash;
	133	+ }
	134	+ }
	135	+ return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
	136	+
	137	+ } elseif ($token instanceof HTMLPurifier_Token_End) {
	138	+ $_extra = '';
	139	+ if ($this->_flashCompat) {
	140	+ if ($token->name == "object" && !empty($this->_flashStack)) {
	141	+ // doesn't do anything for now
	142	+ }
	143	+ }
	144	+ return $_extra . '</' . $token->name . '>';
	145	+
	146	+ } elseif ($token instanceof HTMLPurifier_Token_Empty) {
	147	+ if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
	148	+ $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
	149	+ }
	150	+ $attr = $this->generateAttributes($token->attr, $token->name);
	151	+ return '<' . $token->name . ($attr ? ' ' : '') . $attr .
	152	+ ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
	153	+ . '>';
	154	+
	155	+ } elseif ($token instanceof HTMLPurifier_Token_Text) {
	156	+ return $this->escape($token->data, ENT_NOQUOTES);
	157	+
	158	+ } elseif ($token instanceof HTMLPurifier_Token_Comment) {
	159	+ return '<!--' . $token->data . '-->';
	160	+ } else {
	161	+ return '';
	162	+
	163	+ }
	164	+ }
	165	+
	166	+ /**
	167	+ * Special case processor for the contents of script tags
	168	+ * @warning This runs into problems if there's already a literal
	169	+ * --> somewhere inside the script contents.
	170	+ */
	171	+ public function generateScriptFromToken($token) {
	172	+ if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
	173	+ // Thanks <http://lachy.id.au/log/2005/05/script-comments>
	174	+ $data = preg_replace('#//\s*$#', '', $token->data);
	175	+ return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
	176	+ }
	177	+
	178	+ /**
	179	+ * Generates attribute declarations from attribute array.
	180	+ * @note This does not include the leading or trailing space.
	181	+ * @param $assoc_array_of_attributes Attribute array
	182	+ * @param $element Name of element attributes are for, used to check
	183	+ * attribute minimization.
	184	+ * @return Generate HTML fragment for insertion.
	185	+ */
	186	+ public function generateAttributes($assoc_array_of_attributes, $element = false) {
	187	+ $html = '';
	188	+ if ($this->_sortAttr) ksort($assoc_array_of_attributes);
	189	+ foreach ($assoc_array_of_attributes as $key => $value) {
	190	+ if (!$this->_xhtml) {
	191	+ // Remove namespaced attributes
	192	+ if (strpos($key, ':') !== false) continue;
	193	+ // Check if we should minimize the attribute: val="val" -> val
	194	+ if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
	195	+ $html .= $key . ' ';
	196	+ continue;
	197	+ }
	198	+ }
	199	+ // Workaround for Internet Explorer innerHTML bug.
	200	+ // Essentially, Internet Explorer, when calculating
	201	+ // innerHTML, omits quotes if there are no instances of
	202	+ // angled brackets, quotes or spaces. However, when parsing
	203	+ // HTML (for example, when you assign to innerHTML), it
	204	+ // treats backticks as quotes. Thus,
	205	+ // <img alt="``" />
	206	+ // becomes
	207	+ // <img alt=`` />
	208	+ // becomes
	209	+ // <img alt='' />
	210	+ // Fortunately, all we need to do is trigger an appropriate
	211	+ // quoting style, which we do by adding an extra space.
	212	+ // This also is consistent with the W3C spec, which states
	213	+ // that user agents may ignore leading or trailing
	214	+ // whitespace (in fact, most don't, at least for attributes
	215	+ // like alt, but an extra space at the end is barely
	216	+ // noticeable). Still, we have a configuration knob for
	217	+ // this, since this transformation is not necesary if you
	218	+ // don't process user input with innerHTML or you don't plan
	219	+ // on supporting Internet Explorer.
	220	+ if ($this->_innerHTMLFix) {
	221	+ if (strpos($value, '`') !== false) {
	222	+ // check if correct quoting style would not already be
	223	+ // triggered
	224	+ if (strcspn($value, '"\' <>') === strlen($value)) {
	225	+ // protect!
	226	+ $value .= ' ';
	227	+ }
	228	+ }
	229	+ }
	230	+ $html .= $key.'="'.$this->escape($value).'" ';
	231	+ }
	232	+ return rtrim($html);
	233	+ }
	234	+
	235	+ /**
	236	+ * Escapes raw text data.
	237	+ * @todo This really ought to be protected, but until we have a facility
	238	+ * for properly generating HTML here w/o using tokens, it stays
	239	+ * public.
	240	+ * @param $string String data to escape for HTML.
	241	+ * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
	242	+ * permissible for non-attribute output.
	243	+ * @return String escaped data.
	244	+ */
	245	+ public function escape($string, $quote = null) {
	246	+ // Workaround for APC bug on Mac Leopard reported by sidepodcast
	247	+ // http://htmlpurifier.org/phorum/read.php?3,4823,4846
	248	+ if ($quote === null) $quote = ENT_COMPAT;
	249	+ return htmlspecialchars($string, $quote, 'UTF-8', false);
	250	+ }
251	251
252	252	}
253	253

		@@ -73,7 +73,9 @@ discard block
		block discarded – undo
73	73	* @return Generated HTML
74	74	*/
75	75	public function generateFromTokens($tokens) {
76		- if (!$tokens) return '';
	76	+ if (!$tokens) {
	77	+ return '';
	78	+ }
77	79
78	80	// Basic algorithm
79	81	$html = '';
		@@ -106,8 +108,12 @@ discard block
		block discarded – undo
106	108	// Normalize newlines to system defined value
107	109	if ($this->config->get('Core.NormalizeNewlines')) {
108	110	$nl = $this->config->get('Output.Newline');
109		- if ($nl === null) $nl = PHP_EOL;
110		- if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
	111	+ if ($nl === null) {
	112	+ $nl = PHP_EOL;
	113	+ }
	114	+ if ($nl !== "\n") {
	115	+ $html = str_replace("\n", $nl, $html);
	116	+ }
111	117	}
112	118	return $html;
113	119	}
		@@ -169,7 +175,9 @@ discard block
		block discarded – undo
169	175	* --> somewhere inside the script contents.
170	176	*/
171	177	public function generateScriptFromToken($token) {
172		- if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
	178	+ if (!$token instanceof HTMLPurifier_Token_Text) {
	179	+ return $this->generateFromToken($token);
	180	+ }
173	181	// Thanks <http://lachy.id.au/log/2005/05/script-comments>
174	182	$data = preg_replace('#//\s*$#', '', $token->data);
175	183	return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
		@@ -185,11 +193,15 @@ discard block
		block discarded – undo
185	193	*/
186	194	public function generateAttributes($assoc_array_of_attributes, $element = false) {
187	195	$html = '';
188		- if ($this->_sortAttr) ksort($assoc_array_of_attributes);
	196	+ if ($this->_sortAttr) {
	197	+ ksort($assoc_array_of_attributes);
	198	+ }
189	199	foreach ($assoc_array_of_attributes as $key => $value) {
190	200	if (!$this->_xhtml) {
191	201	// Remove namespaced attributes
192		- if (strpos($key, ':') !== false) continue;
	202	+ if (strpos($key, ':') !== false) {
	203	+ continue;
	204	+ }
193	205	// Check if we should minimize the attribute: val="val" -> val
194	206	if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
195	207	$html .= $key . ' ';
		@@ -245,7 +257,9 @@ discard block
		block discarded – undo
245	257	public function escape($string, $quote = null) {
246	258	// Workaround for APC bug on Mac Leopard reported by sidepodcast
247	259	// http://htmlpurifier.org/phorum/read.php?3,4823,4846
248		- if ($quote === null) $quote = ENT_COMPAT;
	260	+ if ($quote === null) {
	261	+ $quote = ENT_COMPAT;
	262	+ }
249	263	return htmlspecialchars($string, $quote, 'UTF-8', false);
250	264	}
251	265

		@@ -79,7 +79,7 @@ discard block
		block discarded – undo
79	79	$html = '';
80	80	for ($i = 0, $size = count($tokens); $i < $size; $i++) {
81	81	if ($this->_scriptFix && $tokens[$i]->name === 'script'
82		- && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
	82	+ && $i + 2 < $size && $tokens[$i + 2] instanceof HTMLPurifier_Token_End) {
83	83	// script special case
84	84	// the contents of the script block must be ONE token
85	85	// for this to work.
		@@ -132,7 +132,7 @@ discard block
		block discarded – undo
132	132	$this->_flashStack[] = $flash;
133	133	}
134	134	}
135		- return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
	135	+ return '<'.$token->name.($attr ? ' ' : '').$attr.'>';
136	136
137	137	} elseif ($token instanceof HTMLPurifier_Token_End) {
138	138	$_extra = '';
		@@ -141,22 +141,22 @@ discard block
		block discarded – undo
141	141	// doesn't do anything for now
142	142	}
143	143	}
144		- return $_extra . '</' . $token->name . '>';
	144	+ return $_extra.'</'.$token->name.'>';
145	145
146	146	} elseif ($token instanceof HTMLPurifier_Token_Empty) {
147	147	if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
148		- $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
	148	+ $this->_flashStack[count($this->_flashStack) - 1]->param[$token->attr['name']] = $token->attr['value'];
149	149	}
150	150	$attr = $this->generateAttributes($token->attr, $token->name);
151		- return '<' . $token->name . ($attr ? ' ' : '') . $attr .
152		- ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
	151	+ return '<'.$token->name.($attr ? ' ' : '').$attr.
	152	+ ($this->_xhtml ? ' /' : '') // <br /> v. <br>
153	153	. '>';
154	154
155	155	} elseif ($token instanceof HTMLPurifier_Token_Text) {
156	156	return $this->escape($token->data, ENT_NOQUOTES);
157	157
158	158	} elseif ($token instanceof HTMLPurifier_Token_Comment) {
159		- return '<!--' . $token->data . '-->';
	159	+ return '<!--'.$token->data.'-->';
160	160	} else {
161	161	return '';
162	162
		@@ -172,7 +172,7 @@ discard block
		block discarded – undo
172	172	if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
173	173	// Thanks <http://lachy.id.au/log/2005/05/script-comments>
174	174	$data = preg_replace('#//\s*$#', '', $token->data);
175		- return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
	175	+ return '<!--//--><![CDATA[//><!--'."\n".trim($data)."\n".'//--><!]]>';
176	176	}
177	177
178	178	/**
		@@ -192,7 +192,7 @@ discard block
		block discarded – undo
192	192	if (strpos($key, ':') !== false) continue;
193	193	// Check if we should minimize the attribute: val="val" -> val
194	194	if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
195		- $html .= $key . ' ';
	195	+ $html .= $key.' ';
196	196	continue;
197	197	}
198	198	}

		@@ -385,8 +385,7 @@
		block discarded – undo
385	385	* separate lists for processing. Format is element[attr1\|attr2],element2...
386	386	* @warning Although it's largely drawn from TinyMCE's implementation,
387	387	* it is different, and you'll probably have to modify your lists
388		- * @param $list String list to parse
389		- * @param array($allowed_elements, $allowed_attributes)
	388	+ * @param string $list String list to parse
390	389	* @todo Give this its own class, probably static interface
391	390	*/
392	391	public function parseTinyMCEAllowedList($list) {

		@@ -192,20 +192,32 @@ discard block
		block discarded – undo
192	192
193	193	foreach ($this->manager->modules as $module) {
194	194	foreach($module->info_tag_transform as $k => $v) {
195		- if ($v === false) unset($this->info_tag_transform[$k]);
196		- else $this->info_tag_transform[$k] = $v;
	195	+ if ($v === false) {
	196	+ unset($this->info_tag_transform[$k]);
	197	+ } else {
	198	+ $this->info_tag_transform[$k] = $v;
	199	+ }
197	200	}
198	201	foreach($module->info_attr_transform_pre as $k => $v) {
199		- if ($v === false) unset($this->info_attr_transform_pre[$k]);
200		- else $this->info_attr_transform_pre[$k] = $v;
	202	+ if ($v === false) {
	203	+ unset($this->info_attr_transform_pre[$k]);
	204	+ } else {
	205	+ $this->info_attr_transform_pre[$k] = $v;
	206	+ }
201	207	}
202	208	foreach($module->info_attr_transform_post as $k => $v) {
203		- if ($v === false) unset($this->info_attr_transform_post[$k]);
204		- else $this->info_attr_transform_post[$k] = $v;
	209	+ if ($v === false) {
	210	+ unset($this->info_attr_transform_post[$k]);
	211	+ } else {
	212	+ $this->info_attr_transform_post[$k] = $v;
	213	+ }
205	214	}
206	215	foreach ($module->info_injector as $k => $v) {
207		- if ($v === false) unset($this->info_injector[$k]);
208		- else $this->info_injector[$k] = $v;
	216	+ if ($v === false) {
	217	+ unset($this->info_injector[$k]);
	218	+ } else {
	219	+ $this->info_injector[$k] = $v;
	220	+ }
209	221	}
210	222	}
211	223
		@@ -256,7 +268,9 @@ discard block
		block discarded – undo
256	268
257	269	if (is_array($allowed_elements)) {
258	270	foreach ($this->info as $name => $d) {
259		- if(!isset($allowed_elements[$name])) unset($this->info[$name]);
	271	+ if(!isset($allowed_elements[$name])) {
	272	+ unset($this->info[$name]);
	273	+ }
260	274	unset($allowed_elements[$name]);
261	275	}
262	276	// emit errors
		@@ -285,7 +299,9 @@ discard block
		block discarded – undo
285	299	unset($allowed_attributes_mutable[$key]);
286	300	}
287	301	}
288		- if ($delete) unset($this->info_global_attr[$attr]);
	302	+ if ($delete) {
	303	+ unset($this->info_global_attr[$attr]);
	304	+ }
289	305	}
290	306
291	307	foreach ($this->info as $tag => $info) {
		@@ -363,8 +379,12 @@ discard block
		block discarded – undo
363	379	}
364	380	}
365	381	foreach ($forbidden_attributes as $key => $v) {
366		- if (strlen($key) < 2) continue;
367		- if ($key[0] != '*') continue;
	382	+ if (strlen($key) < 2) {
	383	+ continue;
	384	+ }
	385	+ if ($key[0] != '*') {
	386	+ continue;
	387	+ }
368	388	if ($key[1] == '.') {
369	389	trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
370	390	}
		@@ -398,7 +418,9 @@ discard block
		block discarded – undo
398	418
399	419	$chunks = preg_split('/(,\|[\n\r]+)/', $list);
400	420	foreach ($chunks as $chunk) {
401		- if (empty($chunk)) continue;
	421	+ if (empty($chunk)) {
	422	+ continue;
	423	+ }
402	424	// remove TinyMCE element control characters
403	425	if (!strpos($chunk, '[')) {
404	426	$element = $chunk;
		@@ -406,8 +428,12 @@ discard block
		block discarded – undo
406	428	} else {
407	429	list($element, $attr) = explode('[', $chunk);
408	430	}
409		- if ($element !== '*') $elements[$element] = true;
410		- if (!$attr) continue;
	431	+ if ($element !== '*') {
	432	+ $elements[$element] = true;
	433	+ }
	434	+ if (!$attr) {
	435	+ continue;
	436	+ }
411	437	$attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
412	438	$attr = explode('\|', $attr);
413	439	foreach ($attr as $key) {

		@@ -191,15 +191,15 @@ discard block
		block discarded – undo
191	191	$this->doctype = $this->manager->doctype;
192	192
193	193	foreach ($this->manager->modules as $module) {
194		- foreach($module->info_tag_transform as $k => $v) {
	194	+ foreach ($module->info_tag_transform as $k => $v) {
195	195	if ($v === false) unset($this->info_tag_transform[$k]);
196	196	else $this->info_tag_transform[$k] = $v;
197	197	}
198		- foreach($module->info_attr_transform_pre as $k => $v) {
	198	+ foreach ($module->info_attr_transform_pre as $k => $v) {
199	199	if ($v === false) unset($this->info_attr_transform_pre[$k]);
200	200	else $this->info_attr_transform_pre[$k] = $v;
201	201	}
202		- foreach($module->info_attr_transform_post as $k => $v) {
	202	+ foreach ($module->info_attr_transform_post as $k => $v) {
203	203	if ($v === false) unset($this->info_attr_transform_post[$k]);
204	204	else $this->info_attr_transform_post[$k] = $v;
205	205	}
		@@ -256,7 +256,7 @@ discard block
		block discarded – undo
256	256
257	257	if (is_array($allowed_elements)) {
258	258	foreach ($this->info as $name => $d) {
259		- if(!isset($allowed_elements[$name])) unset($this->info[$name]);
	259	+ if (!isset($allowed_elements[$name])) unset($this->info[$name]);
260	260	unset($allowed_elements[$name]);
261	261	}
262	262	// emit errors

		@@ -179,6 +179,7 @@ discard block
		block discarded – undo
179	179	/**
180	180	* Adds a module to the current doctype by first registering it,
181	181	* and then tacking it on to the active doctype
	182	+ * @param HTMLPurifier_HTMLModule $module
182	183	*/
183	184	public function addModule($module) {
184	185	$this->registerModule($module);
		@@ -325,7 +326,7 @@ discard block
		block discarded – undo
325	326	/**
326	327	* Retrieves a single merged element definition
327	328	* @param $name Name of element
328		- * @param $trusted Boolean trusted overriding parameter: set to true
	329	+ * @param boolean $trusted Boolean trusted overriding parameter: set to true
329	330	* if you want the full version of an element
330	331	* @return Merged HTMLPurifier_ElementDef
331	332	* @note You may notice that modules are getting iterated over twice (once

		@@ -182,7 +182,9 @@ discard block
		block discarded – undo
182	182	*/
183	183	public function addModule($module) {
184	184	$this->registerModule($module);
185		- if (is_object($module)) $module = $module->name;
	185	+ if (is_object($module)) {
	186	+ $module = $module->name;
	187	+ }
186	188	$this->userModules[] = $module;
187	189	}
188	190
		@@ -213,8 +215,12 @@ discard block
		block discarded – undo
213	215
214	216	if (is_array($lookup)) {
215	217	foreach ($modules as $k => $m) {
216		- if (isset($special_cases[$m])) continue;
217		- if (!isset($lookup[$m])) unset($modules[$k]);
	218	+ if (isset($special_cases[$m])) {
	219	+ continue;
	220	+ }
	221	+ if (!isset($lookup[$m])) {
	222	+ unset($modules[$k]);
	223	+ }
218	224	}
219	225	}
220	226
		@@ -305,9 +311,13 @@ discard block
		block discarded – undo
305	311
306	312	$elements = array();
307	313	foreach ($this->modules as $module) {
308		- if (!$this->trusted && !$module->safe) continue;
	314	+ if (!$this->trusted && !$module->safe) {
	315	+ continue;
	316	+ }
309	317	foreach ($module->info as $name => $v) {
310		- if (isset($elements[$name])) continue;
	318	+ if (isset($elements[$name])) {
	319	+ continue;
	320	+ }
311	321	$elements[$name] = $this->getElement($name);
312	322	}
313	323	}
		@@ -315,7 +325,9 @@ discard block
		block discarded – undo
315	325	// remove dud elements, this happens when an element that
316	326	// appeared to be safe actually wasn't
317	327	foreach ($elements as $n => $v) {
318		- if ($v === false) unset($elements[$n]);
	328	+ if ($v === false) {
	329	+ unset($elements[$n]);
	330	+ }
319	331	}
320	332
321	333	return $elements;
		@@ -340,7 +352,9 @@ discard block
		block discarded – undo
340	352
341	353	// setup global state variables
342	354	$def = false;
343		- if ($trusted === null) $trusted = $this->trusted;
	355	+ if ($trusted === null) {
	356	+ $trusted = $this->trusted;
	357	+ }
344	358
345	359	// iterate through each module that has registered itself to this
346	360	// element
		@@ -397,7 +411,9 @@ discard block
		block discarded – undo
397	411
398	412	// This can occur if there is a blank definition, but no base to
399	413	// mix it in with
400		- if (!$def) return false;
	414	+ if (!$def) {
	415	+ return false;
	416	+ }
401	417
402	418	// add information on required attributes
403	419	foreach ($def->attr as $attr_name => $attr_def) {

		@@ -47,7 +47,7 @@ discard block
		block discarded – undo
47	47	/** List of prefixes we should use for registering small names */
48	48	public $prefixes = array('HTMLPurifier_HTMLModule_');
49	49
50		- public $contentSets; /*< Instance of HTMLPurifier_ContentSets /
	50	+ public $contentSets; /*< Instance of HTMLPurifier_ContentSets /
51	51	public $attrCollections; /*< Instance of HTMLPurifier_AttrCollections /
52	52
53	53	/** If set to true, unsafe elements and attributes will be allowed */
		@@ -150,7 +150,7 @@ discard block
		block discarded – undo
150	150	$original_module = $module;
151	151	$ok = false;
152	152	foreach ($this->prefixes as $prefix) {
153		- $module = $prefix . $original_module;
	153	+ $module = $prefix.$original_module;
154	154	if (class_exists($module)) {
155	155	$ok = true;
156	156	break;
		@@ -159,7 +159,7 @@ discard block
		block discarded – undo
159	159	if (!$ok) {
160	160	$module = $original_module;
161	161	if (!class_exists($module)) {
162		- trigger_error($original_module . ' module does not exist',
	162	+ trigger_error($original_module.' module does not exist',
163	163	E_USER_ERROR);
164	164	return;
165	165	}
		@@ -167,11 +167,11 @@ discard block
		block discarded – undo
167	167	$module = new $module();
168	168	}
169	169	if (empty($module->name)) {
170		- trigger_error('Module instance of ' . get_class($module) . ' must have name');
	170	+ trigger_error('Module instance of '.get_class($module).' must have name');
171	171	return;
172	172	}
173	173	if (!$overload && isset($this->registeredModules[$module->name])) {
174		- trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
	174	+ trigger_error('Overloading '.$module->name.' without explicit overload parameter', E_USER_WARNING);
175	175	}
176	176	$this->registeredModules[$module->name] = $module;
177	177	}
		@@ -344,7 +344,7 @@ discard block
		block discarded – undo
344	344
345	345	// iterate through each module that has registered itself to this
346	346	// element
347		- foreach($this->elementLookup[$name] as $module_name) {
	347	+ foreach ($this->elementLookup[$name] as $module_name) {
348	348
349	349	$module = $this->modules[$module_name];
350	350

		@@ -19,7 +19,7 @@
		block discarded – undo
19	19	* Builds an IDAccumulator, also initializing the default blacklist
20	20	* @param $config Instance of HTMLPurifier_Config
21	21	* @param $context Instance of HTMLPurifier_Context
22		- * @return Fully initialized HTMLPurifier_IDAccumulator
	22	+ * @return HTMLPurifier_IDAccumulator initialized HTMLPurifier_IDAccumulator
23	23	*/
24	24	public static function build($config, $context) {
25	25	$id_accumulator = new HTMLPurifier_IDAccumulator();

		@@ -9,44 +9,44 @@
		block discarded – undo
9	9	class HTMLPurifier_IDAccumulator
10	10	{
11	11
12		- /**
13		- * Lookup table of IDs we've accumulated.
14		- * @public
15		- */
16		- public $ids = array();
	12	+ /**
	13	+ * Lookup table of IDs we've accumulated.
	14	+ * @public
	15	+ */
	16	+ public $ids = array();
17	17
18		- /**
19		- * Builds an IDAccumulator, also initializing the default blacklist
20		- * @param $config Instance of HTMLPurifier_Config
21		- * @param $context Instance of HTMLPurifier_Context
22		- * @return Fully initialized HTMLPurifier_IDAccumulator
23		- */
24		- public static function build($config, $context) {
25		- $id_accumulator = new HTMLPurifier_IDAccumulator();
26		- $id_accumulator->load($config->get('Attr.IDBlacklist'));
27		- return $id_accumulator;
28		- }
	18	+ /**
	19	+ * Builds an IDAccumulator, also initializing the default blacklist
	20	+ * @param $config Instance of HTMLPurifier_Config
	21	+ * @param $context Instance of HTMLPurifier_Context
	22	+ * @return Fully initialized HTMLPurifier_IDAccumulator
	23	+ */
	24	+ public static function build($config, $context) {
	25	+ $id_accumulator = new HTMLPurifier_IDAccumulator();
	26	+ $id_accumulator->load($config->get('Attr.IDBlacklist'));
	27	+ return $id_accumulator;
	28	+ }
29	29
30		- /**
31		- * Add an ID to the lookup table.
32		- * @param $id ID to be added.
33		- * @return Bool status, true if success, false if there's a dupe
34		- */
35		- public function add($id) {
36		- if (isset($this->ids[$id])) return false;
37		- return $this->ids[$id] = true;
38		- }
	30	+ /**
	31	+ * Add an ID to the lookup table.
	32	+ * @param $id ID to be added.
	33	+ * @return Bool status, true if success, false if there's a dupe
	34	+ */
	35	+ public function add($id) {
	36	+ if (isset($this->ids[$id])) return false;
	37	+ return $this->ids[$id] = true;
	38	+ }
39	39
40		- /**
41		- * Load a list of IDs into the lookup table
42		- * @param $array_of_ids Array of IDs to load
43		- * @note This function doesn't care about duplicates
44		- */
45		- public function load($array_of_ids) {
46		- foreach ($array_of_ids as $id) {
47		- $this->ids[$id] = true;
48		- }
49		- }
	40	+ /**
	41	+ * Load a list of IDs into the lookup table
	42	+ * @param $array_of_ids Array of IDs to load
	43	+ * @note This function doesn't care about duplicates
	44	+ */
	45	+ public function load($array_of_ids) {
	46	+ foreach ($array_of_ids as $id) {
	47	+ $this->ids[$id] = true;
	48	+ }
	49	+ }
50	50
51	51	}
52	52

		@@ -33,7 +33,9 @@
		block discarded – undo
33	33	* @return Bool status, true if success, false if there's a dupe
34	34	*/
35	35	public function add($id) {
36		- if (isset($this->ids[$id])) return false;
	36	+ if (isset($this->ids[$id])) {
	37	+ return false;
	38	+ }
37	39	return $this->ids[$id] = true;
38	40	}
39	41

xpressengine / xe-core

GitHub Access Token became invalid

Push — master ( b130b6...8a2f54 )

Status

Category

Doc Comments +5 added lines patch added patch discarded remove patch

Indentation +526 added lines, -526 removed lines patch added patch discarded remove patch

Braces +33 added lines, -11 removed lines patch added patch discarded remove patch

Spacing +21 added lines, -21 removed lines patch added patch discarded remove patch

Doc Comments +3 added lines patch added patch discarded remove patch

Indentation +196 added lines, -196 removed lines patch added patch discarded remove patch

Braces +15 added lines, -5 removed lines patch added patch discarded remove patch

Spacing +7 added lines, -7 removed lines patch added patch discarded remove patch

Doc Comments +4 added lines, -4 removed lines patch added patch discarded remove patch

Indentation +238 added lines, -238 removed lines patch added patch discarded remove patch

Braces +21 added lines, -7 removed lines patch added patch discarded remove patch

Spacing +9 added lines, -9 removed lines patch added patch discarded remove patch

Doc Comments +1 added lines, -2 removed lines patch added patch discarded remove patch

Indentation +392 added lines, -392 removed lines patch added patch discarded remove patch

Braces +41 added lines, -15 removed lines patch added patch discarded remove patch

Spacing +4 added lines, -4 removed lines patch added patch discarded remove patch

Doc Comments +2 added lines, -1 removed lines patch added patch discarded remove patch

Indentation +406 added lines, -406 removed lines patch added patch discarded remove patch

Braces +24 added lines, -8 removed lines patch added patch discarded remove patch

Spacing +6 added lines, -6 removed lines patch added patch discarded remove patch

Doc Comments +1 added lines, -1 removed lines patch added patch discarded remove patch

Indentation +35 added lines, -35 removed lines patch added patch discarded remove patch

Braces +3 added lines, -1 removed lines patch added patch discarded remove patch

Doc Comments +3 added lines, -2 removed lines patch added patch discarded remove patch

Indentation +216 added lines, -216 removed lines patch added patch discarded remove patch

Spacing +4 added lines, -4 removed lines patch added patch discarded remove patch

Braces +49 added lines, -18 removed lines patch added patch discarded remove patch

Doc Comments +1 added lines, -1 removed lines patch added patch discarded remove patch

Indentation +150 added lines, -150 removed lines patch added patch discarded remove patch

Braces +30 added lines, -10 removed lines patch added patch discarded remove patch

Spacing +1 added lines, -1 removed lines patch added patch discarded remove patch

Doc Comments +4 added lines, -1 removed lines patch added patch discarded remove patch

Indentation +471 added lines, -471 removed lines patch added patch discarded remove patch

Braces +43 added lines, -14 removed lines patch added patch discarded remove patch

Spacing +14 added lines, -14 removed lines patch added patch discarded remove patch

		@@ -64,6 +64,7 @@ discard block
		block discarded – undo
64	64	* result in infinite loops if not used carefully.
65	65	* @warning HTML Purifier will prevent you from fast-forwarding with this
66	66	* function.
	67	+ * @param integer $index
67	68	*/
68	69	public function rewind($index) {
69	70	$this->rewind = $index;
		@@ -123,8 +124,8 @@ discard block
		block discarded – undo
123	124
124	125	/**
125	126	* Tests if the context node allows a certain element
126		- * @param $name Name of element to test for
127		- * @return True if element is allowed, false if it is not
	127	+ * @param string $name Name of element to test for
	128	+ * @return boolean if element is allowed, false if it is not
128	129	*/
129	130	public function allowsElement($name) {
130	131	if (!empty($this->currentNesting)) {

		@@ -16,222 +16,222 @@
		block discarded – undo
16	16	abstract class HTMLPurifier_Injector
17	17	{
18	18
19		- /**
20		- * Advisory name of injector, this is for friendly error messages
21		- */
22		- public $name;
23		-
24		- /**
25		- * Instance of HTMLPurifier_HTMLDefinition
26		- */
27		- protected $htmlDefinition;
28		-
29		- /**
30		- * Reference to CurrentNesting variable in Context. This is an array
31		- * list of tokens that we are currently "inside"
32		- */
33		- protected $currentNesting;
34		-
35		- /**
36		- * Reference to InputTokens variable in Context. This is an array
37		- * list of the input tokens that are being processed.
38		- */
39		- protected $inputTokens;
40		-
41		- /**
42		- * Reference to InputIndex variable in Context. This is an integer
43		- * array index for $this->inputTokens that indicates what token
44		- * is currently being processed.
45		- */
46		- protected $inputIndex;
47		-
48		- /**
49		- * Array of elements and attributes this injector creates and therefore
50		- * need to be allowed by the definition. Takes form of
51		- * array('element' => array('attr', 'attr2'), 'element2')
52		- */
53		- public $needed = array();
54		-
55		- /**
56		- * Index of inputTokens to rewind to.
57		- */
58		- protected $rewind = false;
59		-
60		- /**
61		- * Rewind to a spot to re-perform processing. This is useful if you
62		- * deleted a node, and now need to see if this change affected any
63		- * earlier nodes. Rewinding does not affect other injectors, and can
64		- * result in infinite loops if not used carefully.
65		- * @warning HTML Purifier will prevent you from fast-forwarding with this
66		- * function.
67		- */
68		- public function rewind($index) {
69		- $this->rewind = $index;
70		- }
71		-
72		- /**
73		- * Retrieves rewind, and then unsets it.
74		- */
75		- public function getRewind() {
76		- $r = $this->rewind;
77		- $this->rewind = false;
78		- return $r;
79		- }
80		-
81		- /**
82		- * Prepares the injector by giving it the config and context objects:
83		- * this allows references to important variables to be made within
84		- * the injector. This function also checks if the HTML environment
85		- * will work with the Injector (see checkNeeded()).
86		- * @param $config Instance of HTMLPurifier_Config
87		- * @param $context Instance of HTMLPurifier_Context
88		- * @return Boolean false if success, string of missing needed element/attribute if failure
89		- */
90		- public function prepare($config, $context) {
91		- $this->htmlDefinition = $config->getHTMLDefinition();
92		- // Even though this might fail, some unit tests ignore this and
93		- // still test checkNeeded, so be careful. Maybe get rid of that
94		- // dependency.
95		- $result = $this->checkNeeded($config);
96		- if ($result !== false) return $result;
97		- $this->currentNesting =& $context->get('CurrentNesting');
98		- $this->inputTokens =& $context->get('InputTokens');
99		- $this->inputIndex =& $context->get('InputIndex');
100		- return false;
101		- }
102		-
103		- /**
104		- * This function checks if the HTML environment
105		- * will work with the Injector: if p tags are not allowed, the
106		- * Auto-Paragraphing injector should not be enabled.
107		- * @param $config Instance of HTMLPurifier_Config
108		- * @param $context Instance of HTMLPurifier_Context
109		- * @return Boolean false if success, string of missing needed element/attribute if failure
110		- */
111		- public function checkNeeded($config) {
112		- $def = $config->getHTMLDefinition();
113		- foreach ($this->needed as $element => $attributes) {
114		- if (is_int($element)) $element = $attributes;
115		- if (!isset($def->info[$element])) return $element;
116		- if (!is_array($attributes)) continue;
117		- foreach ($attributes as $name) {
118		- if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
119		- }
120		- }
121		- return false;
122		- }
123		-
124		- /**
125		- * Tests if the context node allows a certain element
126		- * @param $name Name of element to test for
127		- * @return True if element is allowed, false if it is not
128		- */
129		- public function allowsElement($name) {
130		- if (!empty($this->currentNesting)) {
131		- $parent_token = array_pop($this->currentNesting);
132		- $this->currentNesting[] = $parent_token;
133		- $parent = $this->htmlDefinition->info[$parent_token->name];
134		- } else {
135		- $parent = $this->htmlDefinition->info_parent_def;
136		- }
137		- if (!isset($parent->child->elements[$name]) \|\| isset($parent->excludes[$name])) {
138		- return false;
139		- }
140		- // check for exclusion
141		- for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
142		- $node = $this->currentNesting[$i];
143		- $def = $this->htmlDefinition->info[$node->name];
144		- if (isset($def->excludes[$name])) return false;
145		- }
146		- return true;
147		- }
148		-
149		- /**
150		- * Iterator function, which starts with the next token and continues until
151		- * you reach the end of the input tokens.
152		- * @warning Please prevent previous references from interfering with this
153		- * functions by setting $i = null beforehand!
154		- * @param &$i Current integer index variable for inputTokens
155		- * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
156		- */
157		- protected function forward(&$i, &$current) {
158		- if ($i === null) $i = $this->inputIndex + 1;
159		- else $i++;
160		- if (!isset($this->inputTokens[$i])) return false;
161		- $current = $this->inputTokens[$i];
162		- return true;
163		- }
164		-
165		- /**
166		- * Similar to _forward, but accepts a third parameter $nesting (which
167		- * should be initialized at 0) and stops when we hit the end tag
168		- * for the node $this->inputIndex starts in.
169		- */
170		- protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
171		- $result = $this->forward($i, $current);
172		- if (!$result) return false;
173		- if ($nesting === null) $nesting = 0;
174		- if ($current instanceof HTMLPurifier_Token_Start) $nesting++;
175		- elseif ($current instanceof HTMLPurifier_Token_End) {
176		- if ($nesting <= 0) return false;
177		- $nesting--;
178		- }
179		- return true;
180		- }
181		-
182		- /**
183		- * Iterator function, starts with the previous token and continues until
184		- * you reach the beginning of input tokens.
185		- * @warning Please prevent previous references from interfering with this
186		- * functions by setting $i = null beforehand!
187		- * @param &$i Current integer index variable for inputTokens
188		- * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
189		- */
190		- protected function backward(&$i, &$current) {
191		- if ($i === null) $i = $this->inputIndex - 1;
192		- else $i--;
193		- if ($i < 0) return false;
194		- $current = $this->inputTokens[$i];
195		- return true;
196		- }
197		-
198		- /**
199		- * Initializes the iterator at the current position. Use in a do {} while;
200		- * loop to force the _forward and _backward functions to start at the
201		- * current location.
202		- * @warning Please prevent previous references from interfering with this
203		- * functions by setting $i = null beforehand!
204		- * @param &$i Current integer index variable for inputTokens
205		- * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
206		- */
207		- protected function current(&$i, &$current) {
208		- if ($i === null) $i = $this->inputIndex;
209		- $current = $this->inputTokens[$i];
210		- }
211		-
212		- /**
213		- * Handler that is called when a text token is processed
214		- */
215		- public function handleText(&$token) {}
216		-
217		- /**
218		- * Handler that is called when a start or empty token is processed
219		- */
220		- public function handleElement(&$token) {}
221		-
222		- /**
223		- * Handler that is called when an end token is processed
224		- */
225		- public function handleEnd(&$token) {
226		- $this->notifyEnd($token);
227		- }
228		-
229		- /**
230		- * Notifier that is called when an end token is processed
231		- * @note This differs from handlers in that the token is read-only
232		- * @deprecated
233		- */
234		- public function notifyEnd($token) {}
	19	+ /**
	20	+ * Advisory name of injector, this is for friendly error messages
	21	+ */
	22	+ public $name;
	23	+
	24	+ /**
	25	+ * Instance of HTMLPurifier_HTMLDefinition
	26	+ */
	27	+ protected $htmlDefinition;
	28	+
	29	+ /**
	30	+ * Reference to CurrentNesting variable in Context. This is an array
	31	+ * list of tokens that we are currently "inside"
	32	+ */
	33	+ protected $currentNesting;
	34	+
	35	+ /**
	36	+ * Reference to InputTokens variable in Context. This is an array
	37	+ * list of the input tokens that are being processed.
	38	+ */
	39	+ protected $inputTokens;
	40	+
	41	+ /**
	42	+ * Reference to InputIndex variable in Context. This is an integer
	43	+ * array index for $this->inputTokens that indicates what token
	44	+ * is currently being processed.
	45	+ */
	46	+ protected $inputIndex;
	47	+
	48	+ /**
	49	+ * Array of elements and attributes this injector creates and therefore
	50	+ * need to be allowed by the definition. Takes form of
	51	+ * array('element' => array('attr', 'attr2'), 'element2')
	52	+ */
	53	+ public $needed = array();
	54	+
	55	+ /**
	56	+ * Index of inputTokens to rewind to.
	57	+ */
	58	+ protected $rewind = false;
	59	+
	60	+ /**
	61	+ * Rewind to a spot to re-perform processing. This is useful if you
	62	+ * deleted a node, and now need to see if this change affected any
	63	+ * earlier nodes. Rewinding does not affect other injectors, and can
	64	+ * result in infinite loops if not used carefully.
	65	+ * @warning HTML Purifier will prevent you from fast-forwarding with this
	66	+ * function.
	67	+ */
	68	+ public function rewind($index) {
	69	+ $this->rewind = $index;
	70	+ }
	71	+
	72	+ /**
	73	+ * Retrieves rewind, and then unsets it.
	74	+ */
	75	+ public function getRewind() {
	76	+ $r = $this->rewind;
	77	+ $this->rewind = false;
	78	+ return $r;
	79	+ }
	80	+
	81	+ /**
	82	+ * Prepares the injector by giving it the config and context objects:
	83	+ * this allows references to important variables to be made within
	84	+ * the injector. This function also checks if the HTML environment
	85	+ * will work with the Injector (see checkNeeded()).
	86	+ * @param $config Instance of HTMLPurifier_Config
	87	+ * @param $context Instance of HTMLPurifier_Context
	88	+ * @return Boolean false if success, string of missing needed element/attribute if failure
	89	+ */
	90	+ public function prepare($config, $context) {
	91	+ $this->htmlDefinition = $config->getHTMLDefinition();
	92	+ // Even though this might fail, some unit tests ignore this and
	93	+ // still test checkNeeded, so be careful. Maybe get rid of that
	94	+ // dependency.
	95	+ $result = $this->checkNeeded($config);
	96	+ if ($result !== false) return $result;
	97	+ $this->currentNesting =& $context->get('CurrentNesting');
	98	+ $this->inputTokens =& $context->get('InputTokens');
	99	+ $this->inputIndex =& $context->get('InputIndex');
	100	+ return false;
	101	+ }
	102	+
	103	+ /**
	104	+ * This function checks if the HTML environment
	105	+ * will work with the Injector: if p tags are not allowed, the
	106	+ * Auto-Paragraphing injector should not be enabled.
	107	+ * @param $config Instance of HTMLPurifier_Config
	108	+ * @param $context Instance of HTMLPurifier_Context
	109	+ * @return Boolean false if success, string of missing needed element/attribute if failure
	110	+ */
	111	+ public function checkNeeded($config) {
	112	+ $def = $config->getHTMLDefinition();
	113	+ foreach ($this->needed as $element => $attributes) {
	114	+ if (is_int($element)) $element = $attributes;
	115	+ if (!isset($def->info[$element])) return $element;
	116	+ if (!is_array($attributes)) continue;
	117	+ foreach ($attributes as $name) {
	118	+ if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
	119	+ }
	120	+ }
	121	+ return false;
	122	+ }
	123	+
	124	+ /**
	125	+ * Tests if the context node allows a certain element
	126	+ * @param $name Name of element to test for
	127	+ * @return True if element is allowed, false if it is not
	128	+ */
	129	+ public function allowsElement($name) {
	130	+ if (!empty($this->currentNesting)) {
	131	+ $parent_token = array_pop($this->currentNesting);
	132	+ $this->currentNesting[] = $parent_token;
	133	+ $parent = $this->htmlDefinition->info[$parent_token->name];
	134	+ } else {
	135	+ $parent = $this->htmlDefinition->info_parent_def;
	136	+ }
	137	+ if (!isset($parent->child->elements[$name]) \|\| isset($parent->excludes[$name])) {
	138	+ return false;
	139	+ }
	140	+ // check for exclusion
	141	+ for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
	142	+ $node = $this->currentNesting[$i];
	143	+ $def = $this->htmlDefinition->info[$node->name];
	144	+ if (isset($def->excludes[$name])) return false;
	145	+ }
	146	+ return true;
	147	+ }
	148	+
	149	+ /**
	150	+ * Iterator function, which starts with the next token and continues until
	151	+ * you reach the end of the input tokens.
	152	+ * @warning Please prevent previous references from interfering with this
	153	+ * functions by setting $i = null beforehand!
	154	+ * @param &$i Current integer index variable for inputTokens
	155	+ * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
	156	+ */
	157	+ protected function forward(&$i, &$current) {
	158	+ if ($i === null) $i = $this->inputIndex + 1;
	159	+ else $i++;
	160	+ if (!isset($this->inputTokens[$i])) return false;
	161	+ $current = $this->inputTokens[$i];
	162	+ return true;
	163	+ }
	164	+
	165	+ /**
	166	+ * Similar to _forward, but accepts a third parameter $nesting (which
	167	+ * should be initialized at 0) and stops when we hit the end tag
	168	+ * for the node $this->inputIndex starts in.
	169	+ */
	170	+ protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
	171	+ $result = $this->forward($i, $current);
	172	+ if (!$result) return false;
	173	+ if ($nesting === null) $nesting = 0;
	174	+ if ($current instanceof HTMLPurifier_Token_Start) $nesting++;
	175	+ elseif ($current instanceof HTMLPurifier_Token_End) {
	176	+ if ($nesting <= 0) return false;
	177	+ $nesting--;
	178	+ }
	179	+ return true;
	180	+ }
	181	+
	182	+ /**
	183	+ * Iterator function, starts with the previous token and continues until
	184	+ * you reach the beginning of input tokens.
	185	+ * @warning Please prevent previous references from interfering with this
	186	+ * functions by setting $i = null beforehand!
	187	+ * @param &$i Current integer index variable for inputTokens
	188	+ * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
	189	+ */
	190	+ protected function backward(&$i, &$current) {
	191	+ if ($i === null) $i = $this->inputIndex - 1;
	192	+ else $i--;
	193	+ if ($i < 0) return false;
	194	+ $current = $this->inputTokens[$i];
	195	+ return true;
	196	+ }
	197	+
	198	+ /**
	199	+ * Initializes the iterator at the current position. Use in a do {} while;
	200	+ * loop to force the _forward and _backward functions to start at the
	201	+ * current location.
	202	+ * @warning Please prevent previous references from interfering with this
	203	+ * functions by setting $i = null beforehand!
	204	+ * @param &$i Current integer index variable for inputTokens
	205	+ * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
	206	+ */
	207	+ protected function current(&$i, &$current) {
	208	+ if ($i === null) $i = $this->inputIndex;
	209	+ $current = $this->inputTokens[$i];
	210	+ }
	211	+
	212	+ /**
	213	+ * Handler that is called when a text token is processed
	214	+ */
	215	+ public function handleText(&$token) {}
	216	+
	217	+ /**
	218	+ * Handler that is called when a start or empty token is processed
	219	+ */
	220	+ public function handleElement(&$token) {}
	221	+
	222	+ /**
	223	+ * Handler that is called when an end token is processed
	224	+ */
	225	+ public function handleEnd(&$token) {
	226	+ $this->notifyEnd($token);
	227	+ }
	228	+
	229	+ /**
	230	+ * Notifier that is called when an end token is processed
	231	+ * @note This differs from handlers in that the token is read-only
	232	+ * @deprecated
	233	+ */
	234	+ public function notifyEnd($token) {}
235	235
236	236
237	237	}

		@@ -94,9 +94,9 @@ discard block
		block discarded – undo
94	94	// dependency.
95	95	$result = $this->checkNeeded($config);
96	96	if ($result !== false) return $result;
97		- $this->currentNesting =& $context->get('CurrentNesting');
98		- $this->inputTokens =& $context->get('InputTokens');
99		- $this->inputIndex =& $context->get('InputIndex');
	97	+ $this->currentNesting = & $context->get('CurrentNesting');
	98	+ $this->inputTokens = & $context->get('InputTokens');
	99	+ $this->inputIndex = & $context->get('InputIndex');
100	100	return false;
101	101	}
102	102
		@@ -171,7 +171,7 @@ discard block
		block discarded – undo
171	171	$result = $this->forward($i, $current);
172	172	if (!$result) return false;
173	173	if ($nesting === null) $nesting = 0;
174		- if ($current instanceof HTMLPurifier_Token_Start) $nesting++;
	174	+ if ($current instanceof HTMLPurifier_Token_Start) $nesting++;
175	175	elseif ($current instanceof HTMLPurifier_Token_End) {
176	176	if ($nesting <= 0) return false;
177	177	$nesting--;

		@@ -93,7 +93,9 @@ discard block
		block discarded – undo
93	93	// still test checkNeeded, so be careful. Maybe get rid of that
94	94	// dependency.
95	95	$result = $this->checkNeeded($config);
96		- if ($result !== false) return $result;
	96	+ if ($result !== false) {
	97	+ return $result;
	98	+ }
97	99	$this->currentNesting =& $context->get('CurrentNesting');
98	100	$this->inputTokens =& $context->get('InputTokens');
99	101	$this->inputIndex =& $context->get('InputIndex');
		@@ -111,11 +113,19 @@ discard block
		block discarded – undo
111	113	public function checkNeeded($config) {
112	114	$def = $config->getHTMLDefinition();
113	115	foreach ($this->needed as $element => $attributes) {
114		- if (is_int($element)) $element = $attributes;
115		- if (!isset($def->info[$element])) return $element;
116		- if (!is_array($attributes)) continue;
	116	+ if (is_int($element)) {
	117	+ $element = $attributes;
	118	+ }
	119	+ if (!isset($def->info[$element])) {
	120	+ return $element;
	121	+ }
	122	+ if (!is_array($attributes)) {
	123	+ continue;
	124	+ }
117	125	foreach ($attributes as $name) {
118		- if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
	126	+ if (!isset($def->info[$element]->attr[$name])) {
	127	+ return "$element.$name";
	128	+ }
119	129	}
120	130	}
121	131	return false;
		@@ -141,7 +151,9 @@ discard block
		block discarded – undo
141	151	for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
142	152	$node = $this->currentNesting[$i];
143	153	$def = $this->htmlDefinition->info[$node->name];
144		- if (isset($def->excludes[$name])) return false;
	154	+ if (isset($def->excludes[$name])) {
	155	+ return false;
	156	+ }
145	157	}
146	158	return true;
147	159	}
		@@ -155,9 +167,14 @@ discard block
		block discarded – undo
155	167	* @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
156	168	*/
157	169	protected function forward(&$i, &$current) {
158		- if ($i === null) $i = $this->inputIndex + 1;
159		- else $i++;
160		- if (!isset($this->inputTokens[$i])) return false;
	170	+ if ($i === null) {
	171	+ $i = $this->inputIndex + 1;
	172	+ } else {
	173	+ $i++;
	174	+ }
	175	+ if (!isset($this->inputTokens[$i])) {
	176	+ return false;
	177	+ }
161	178	$current = $this->inputTokens[$i];
162	179	return true;
163	180	}
		@@ -169,11 +186,18 @@ discard block
		block discarded – undo
169	186	*/
170	187	protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
171	188	$result = $this->forward($i, $current);
172		- if (!$result) return false;
173		- if ($nesting === null) $nesting = 0;
174		- if ($current instanceof HTMLPurifier_Token_Start) $nesting++;
175		- elseif ($current instanceof HTMLPurifier_Token_End) {
176		- if ($nesting <= 0) return false;
	189	+ if (!$result) {
	190	+ return false;
	191	+ }
	192	+ if ($nesting === null) {
	193	+ $nesting = 0;
	194	+ }
	195	+ if ($current instanceof HTMLPurifier_Token_Start) {
	196	+ $nesting++;
	197	+ } elseif ($current instanceof HTMLPurifier_Token_End) {
	198	+ if ($nesting <= 0) {
	199	+ return false;
	200	+ }
177	201	$nesting--;
178	202	}
179	203	return true;
		@@ -188,9 +212,14 @@ discard block
		block discarded – undo
188	212	* @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
189	213	*/
190	214	protected function backward(&$i, &$current) {
191		- if ($i === null) $i = $this->inputIndex - 1;
192		- else $i--;
193		- if ($i < 0) return false;
	215	+ if ($i === null) {
	216	+ $i = $this->inputIndex - 1;
	217	+ } else {
	218	+ $i--;
	219	+ }
	220	+ if ($i < 0) {
	221	+ return false;
	222	+ }
194	223	$current = $this->inputTokens[$i];
195	224	return true;
196	225	}
		@@ -205,7 +234,9 @@ discard block
		block discarded – undo
205	234	* @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
206	235	*/
207	236	protected function current(&$i, &$current) {
208		- if ($i === null) $i = $this->inputIndex;
	237	+ if ($i === null) {
	238	+ $i = $this->inputIndex;
	239	+ }
209	240	$current = $this->inputTokens[$i];
210	241	}
211	242

		@@ -66,7 +66,7 @@
		block discarded – undo
66	66
67	67	/**
68	68	* Retrieves a localised message.
69		- * @param $key string identifier of message
	69	+ * @param string $key string identifier of message
70	70	* @return string localised message
71	71	*/
72	72	public function getMessage($key) {

		@@ -7,156 +7,156 @@
		block discarded – undo
7	7	class HTMLPurifier_Language
8	8	{
9	9
10		- /**
11		- * ISO 639 language code of language. Prefers shortest possible version
12		- */
13		- public $code = 'en';
14		-
15		- /**
16		- * Fallback language code
17		- */
18		- public $fallback = false;
19		-
20		- /**
21		- * Array of localizable messages
22		- */
23		- public $messages = array();
24		-
25		- /**
26		- * Array of localizable error codes
27		- */
28		- public $errorNames = array();
29		-
30		- /**
31		- * True if no message file was found for this language, so English
32		- * is being used instead. Check this if you'd like to notify the
33		- * user that they've used a non-supported language.
34		- */
35		- public $error = false;
36		-
37		- /**
38		- * Has the language object been loaded yet?
39		- * @todo Make it private, fix usage in HTMLPurifier_LanguageTest
40		- */
41		- public $_loaded = false;
42		-
43		- /**
44		- * Instances of HTMLPurifier_Config and HTMLPurifier_Context
45		- */
46		- protected $config, $context;
47		-
48		- public function __construct($config, $context) {
49		- $this->config = $config;
50		- $this->context = $context;
51		- }
52		-
53		- /**
54		- * Loads language object with necessary info from factory cache
55		- * @note This is a lazy loader
56		- */
57		- public function load() {
58		- if ($this->_loaded) return;
59		- $factory = HTMLPurifier_LanguageFactory::instance();
60		- $factory->loadLanguage($this->code);
61		- foreach ($factory->keys as $key) {
62		- $this->$key = $factory->cache[$this->code][$key];
63		- }
64		- $this->_loaded = true;
65		- }
66		-
67		- /**
68		- * Retrieves a localised message.
69		- * @param $key string identifier of message
70		- * @return string localised message
71		- */
72		- public function getMessage($key) {
73		- if (!$this->_loaded) $this->load();
74		- if (!isset($this->messages[$key])) return "[$key]";
75		- return $this->messages[$key];
76		- }
77		-
78		- /**
79		- * Retrieves a localised error name.
80		- * @param $int integer error number, corresponding to PHP's error
81		- * reporting
82		- * @return string localised message
83		- */
84		- public function getErrorName($int) {
85		- if (!$this->_loaded) $this->load();
86		- if (!isset($this->errorNames[$int])) return "[Error: $int]";
87		- return $this->errorNames[$int];
88		- }
89		-
90		- /**
91		- * Converts an array list into a string readable representation
92		- */
93		- public function listify($array) {
94		- $sep = $this->getMessage('Item separator');
95		- $sep_last = $this->getMessage('Item separator last');
96		- $ret = '';
97		- for ($i = 0, $c = count($array); $i < $c; $i++) {
98		- if ($i == 0) {
99		- } elseif ($i + 1 < $c) {
100		- $ret .= $sep;
101		- } else {
102		- $ret .= $sep_last;
103		- }
104		- $ret .= $array[$i];
105		- }
106		- return $ret;
107		- }
108		-
109		- /**
110		- * Formats a localised message with passed parameters
111		- * @param $key string identifier of message
112		- * @param $args Parameters to substitute in
113		- * @return string localised message
114		- * @todo Implement conditionals? Right now, some messages make
115		- * reference to line numbers, but those aren't always available
116		- */
117		- public function formatMessage($key, $args = array()) {
118		- if (!$this->_loaded) $this->load();
119		- if (!isset($this->messages[$key])) return "[$key]";
120		- $raw = $this->messages[$key];
121		- $subst = array();
122		- $generator = false;
123		- foreach ($args as $i => $value) {
124		- if (is_object($value)) {
125		- if ($value instanceof HTMLPurifier_Token) {
126		- // factor this out some time
127		- if (!$generator) $generator = $this->context->get('Generator');
128		- if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
129		- if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
130		- $subst['$'.$i.'.Compact'] =
131		- $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
132		- // a more complex algorithm for compact representation
133		- // could be introduced for all types of tokens. This
134		- // may need to be factored out into a dedicated class
135		- if (!empty($value->attr)) {
136		- $stripped_token = clone $value;
137		- $stripped_token->attr = array();
138		- $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
139		- }
140		- $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
141		- }
142		- continue;
143		- } elseif (is_array($value)) {
144		- $keys = array_keys($value);
145		- if (array_keys($keys) === $keys) {
146		- // list
147		- $subst['$'.$i] = $this->listify($value);
148		- } else {
149		- // associative array
150		- // no $i implementation yet, sorry
151		- $subst['$'.$i.'.Keys'] = $this->listify($keys);
152		- $subst['$'.$i.'.Values'] = $this->listify(array_values($value));
153		- }
154		- continue;
155		- }
156		- $subst['$' . $i] = $value;
157		- }
158		- return strtr($raw, $subst);
159		- }
	10	+ /**
	11	+ * ISO 639 language code of language. Prefers shortest possible version
	12	+ */
	13	+ public $code = 'en';
	14	+
	15	+ /**
	16	+ * Fallback language code
	17	+ */
	18	+ public $fallback = false;
	19	+
	20	+ /**
	21	+ * Array of localizable messages
	22	+ */
	23	+ public $messages = array();
	24	+
	25	+ /**
	26	+ * Array of localizable error codes
	27	+ */
	28	+ public $errorNames = array();
	29	+
	30	+ /**
	31	+ * True if no message file was found for this language, so English
	32	+ * is being used instead. Check this if you'd like to notify the
	33	+ * user that they've used a non-supported language.
	34	+ */
	35	+ public $error = false;
	36	+
	37	+ /**
	38	+ * Has the language object been loaded yet?
	39	+ * @todo Make it private, fix usage in HTMLPurifier_LanguageTest
	40	+ */
	41	+ public $_loaded = false;
	42	+
	43	+ /**
	44	+ * Instances of HTMLPurifier_Config and HTMLPurifier_Context
	45	+ */
	46	+ protected $config, $context;
	47	+
	48	+ public function __construct($config, $context) {
	49	+ $this->config = $config;
	50	+ $this->context = $context;
	51	+ }
	52	+
	53	+ /**
	54	+ * Loads language object with necessary info from factory cache
	55	+ * @note This is a lazy loader
	56	+ */
	57	+ public function load() {
	58	+ if ($this->_loaded) return;
	59	+ $factory = HTMLPurifier_LanguageFactory::instance();
	60	+ $factory->loadLanguage($this->code);
	61	+ foreach ($factory->keys as $key) {
	62	+ $this->$key = $factory->cache[$this->code][$key];
	63	+ }
	64	+ $this->_loaded = true;
	65	+ }
	66	+
	67	+ /**
	68	+ * Retrieves a localised message.
	69	+ * @param $key string identifier of message
	70	+ * @return string localised message
	71	+ */
	72	+ public function getMessage($key) {
	73	+ if (!$this->_loaded) $this->load();
	74	+ if (!isset($this->messages[$key])) return "[$key]";
	75	+ return $this->messages[$key];
	76	+ }
	77	+
	78	+ /**
	79	+ * Retrieves a localised error name.
	80	+ * @param $int integer error number, corresponding to PHP's error
	81	+ * reporting
	82	+ * @return string localised message
	83	+ */
	84	+ public function getErrorName($int) {
	85	+ if (!$this->_loaded) $this->load();
	86	+ if (!isset($this->errorNames[$int])) return "[Error: $int]";
	87	+ return $this->errorNames[$int];
	88	+ }
	89	+
	90	+ /**
	91	+ * Converts an array list into a string readable representation
	92	+ */
	93	+ public function listify($array) {
	94	+ $sep = $this->getMessage('Item separator');
	95	+ $sep_last = $this->getMessage('Item separator last');
	96	+ $ret = '';
	97	+ for ($i = 0, $c = count($array); $i < $c; $i++) {
	98	+ if ($i == 0) {
	99	+ } elseif ($i + 1 < $c) {
	100	+ $ret .= $sep;
	101	+ } else {
	102	+ $ret .= $sep_last;
	103	+ }
	104	+ $ret .= $array[$i];
	105	+ }
	106	+ return $ret;
	107	+ }
	108	+
	109	+ /**
	110	+ * Formats a localised message with passed parameters
	111	+ * @param $key string identifier of message
	112	+ * @param $args Parameters to substitute in
	113	+ * @return string localised message
	114	+ * @todo Implement conditionals? Right now, some messages make
	115	+ * reference to line numbers, but those aren't always available
	116	+ */
	117	+ public function formatMessage($key, $args = array()) {
	118	+ if (!$this->_loaded) $this->load();
	119	+ if (!isset($this->messages[$key])) return "[$key]";
	120	+ $raw = $this->messages[$key];
	121	+ $subst = array();
	122	+ $generator = false;
	123	+ foreach ($args as $i => $value) {
	124	+ if (is_object($value)) {
	125	+ if ($value instanceof HTMLPurifier_Token) {
	126	+ // factor this out some time
	127	+ if (!$generator) $generator = $this->context->get('Generator');
	128	+ if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
	129	+ if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
	130	+ $subst['$'.$i.'.Compact'] =
	131	+ $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
	132	+ // a more complex algorithm for compact representation
	133	+ // could be introduced for all types of tokens. This
	134	+ // may need to be factored out into a dedicated class
	135	+ if (!empty($value->attr)) {
	136	+ $stripped_token = clone $value;
	137	+ $stripped_token->attr = array();
	138	+ $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
	139	+ }
	140	+ $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
	141	+ }
	142	+ continue;
	143	+ } elseif (is_array($value)) {
	144	+ $keys = array_keys($value);
	145	+ if (array_keys($keys) === $keys) {
	146	+ // list
	147	+ $subst['$'.$i] = $this->listify($value);
	148	+ } else {
	149	+ // associative array
	150	+ // no $i implementation yet, sorry
	151	+ $subst['$'.$i.'.Keys'] = $this->listify($keys);
	152	+ $subst['$'.$i.'.Values'] = $this->listify(array_values($value));
	153	+ }
	154	+ continue;
	155	+ }
	156	+ $subst['$' . $i] = $value;
	157	+ }
	158	+ return strtr($raw, $subst);
	159	+ }
160	160
161	161	}
162	162

		@@ -55,7 +55,9 @@ discard block
		block discarded – undo
55	55	* @note This is a lazy loader
56	56	*/
57	57	public function load() {
58		- if ($this->_loaded) return;
	58	+ if ($this->_loaded) {
	59	+ return;
	60	+ }
59	61	$factory = HTMLPurifier_LanguageFactory::instance();
60	62	$factory->loadLanguage($this->code);
61	63	foreach ($factory->keys as $key) {
		@@ -70,8 +72,12 @@ discard block
		block discarded – undo
70	72	* @return string localised message
71	73	*/
72	74	public function getMessage($key) {
73		- if (!$this->_loaded) $this->load();
74		- if (!isset($this->messages[$key])) return "[$key]";
	75	+ if (!$this->_loaded) {
	76	+ $this->load();
	77	+ }
	78	+ if (!isset($this->messages[$key])) {
	79	+ return "[$key]";
	80	+ }
75	81	return $this->messages[$key];
76	82	}
77	83
		@@ -82,8 +88,12 @@ discard block
		block discarded – undo
82	88	* @return string localised message
83	89	*/
84	90	public function getErrorName($int) {
85		- if (!$this->_loaded) $this->load();
86		- if (!isset($this->errorNames[$int])) return "[Error: $int]";
	91	+ if (!$this->_loaded) {
	92	+ $this->load();
	93	+ }
	94	+ if (!isset($this->errorNames[$int])) {
	95	+ return "[Error: $int]";
	96	+ }
87	97	return $this->errorNames[$int];
88	98	}
89	99
		@@ -115,8 +125,12 @@ discard block
		block discarded – undo
115	125	* reference to line numbers, but those aren't always available
116	126	*/
117	127	public function formatMessage($key, $args = array()) {
118		- if (!$this->_loaded) $this->load();
119		- if (!isset($this->messages[$key])) return "[$key]";
	128	+ if (!$this->_loaded) {
	129	+ $this->load();
	130	+ }
	131	+ if (!isset($this->messages[$key])) {
	132	+ return "[$key]";
	133	+ }
120	134	$raw = $this->messages[$key];
121	135	$subst = array();
122	136	$generator = false;
		@@ -124,9 +138,15 @@ discard block
		block discarded – undo
124	138	if (is_object($value)) {
125	139	if ($value instanceof HTMLPurifier_Token) {
126	140	// factor this out some time
127		- if (!$generator) $generator = $this->context->get('Generator');
128		- if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
129		- if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
	141	+ if (!$generator) {
	142	+ $generator = $this->context->get('Generator');
	143	+ }
	144	+ if (isset($value->name)) {
	145	+ $subst['$'.$i.'.Name'] = $value->name;
	146	+ }
	147	+ if (isset($value->data)) {
	148	+ $subst['$'.$i.'.Data'] = $value->data;
	149	+ }
130	150	$subst['$'.$i.'.Compact'] =
131	151	$subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
132	152	// a more complex algorithm for compact representation

		@@ -153,7 +153,7 @@
		block discarded – undo
153	153	}
154	154	continue;
155	155	}
156		- $subst['$' . $i] = $value;
	156	+ $subst['$'.$i] = $value;
157	157	}
158	158	return strtr($raw, $subst);
159	159	}

		@@ -319,6 +319,9 @@ discard block
		block discarded – undo
319	319
320	320	/**
321	321	* PHP 5.0.x compatible substr_count that implements offset and length
	322	+ * @param string $needle
	323	+ * @param integer $offset
	324	+ * @param integer $length
322	325	*/
323	326	protected function substrCount($haystack, $needle, $offset, $length) {
324	327	static $oldVersion;
		@@ -336,7 +339,7 @@ discard block
		block discarded – undo
336	339	/**
337	340	* Takes the inside of an HTML tag and makes an assoc array of attributes.
338	341	*
339		- * @param $string Inside of tag excluding name.
	342	+ * @param string $string Inside of tag excluding name.
340	343	* @returns Assoc array of attributes.
341	344	*/
342	345	public function parseAttributeString($string, $config, $context) {

		@@ -137,7 +137,9 @@ discard block
		block discarded – undo
137	137	} elseif (!$inside_tag) {
138	138	// We are not inside tag but there are no more tags
139	139	// If we're already at the end, break
140		- if ($cursor === strlen($html)) break;
	140	+ if ($cursor === strlen($html)) {
	141	+ break;
	142	+ }
141	143	// Create Text of rest of string
142	144	$token = new
143	145	HTMLPurifier_Token_Text(
		@@ -147,7 +149,9 @@ discard block
		block discarded – undo
147	149	)
148	150	)
149	151	);
150		- if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
	152	+ if ($maintain_line_numbers) {
	153	+ $token->rawPosition($current_line, $current_col);
	154	+ }
151	155	$array[] = $token;
152	156	break;
153	157	} elseif ($inside_tag && $position_next_gt !== false) {
		@@ -180,7 +184,9 @@ discard block
		block discarded – undo
180	184	// uh oh, we have a comment that extends to
181	185	// infinity. Can't be helped: set comment
182	186	// end position to end of string
183		- if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment');
	187	+ if ($e) {
	188	+ $e->send(E_WARNING, 'Lexer: Unclosed comment');
	189	+ }
184	190	$position_comment_end = strlen($html);
185	191	$end = true;
186	192	} else {
		@@ -224,7 +230,9 @@ discard block
		block discarded – undo
224	230	// text and go our merry way
225	231	if (!ctype_alpha($segment[0])) {
226	232	// XML: $segment[0] !== '_' && $segment[0] !== ':'
227		- if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
	233	+ if ($e) {
	234	+ $e->send(E_NOTICE, 'Lexer: Unescaped lt');
	235	+ }
228	236	$token = new HTMLPurifier_Token_Text('<');
229	237	if ($maintain_line_numbers) {
230	238	$token->rawPosition($current_line, $current_col);
		@@ -296,7 +304,9 @@ discard block
		block discarded – undo
296	304	continue;
297	305	} else {
298	306	// inside tag, but there's no ending > sign
299		- if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
	307	+ if ($e) {
	308	+ $e->send(E_WARNING, 'Lexer: Missing gt');
	309	+ }
300	310	$token = new
301	311	HTMLPurifier_Token_Text(
302	312	'<' .
		@@ -304,7 +314,9 @@ discard block
		block discarded – undo
304	314	substr($html, $cursor)
305	315	)
306	316	);
307		- if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
	317	+ if ($maintain_line_numbers) {
	318	+ $token->rawPosition($current_line, $current_col);
	319	+ }
308	320	// no cursor scroll? Hmm...
309	321	$array[] = $token;
310	322	break;
		@@ -342,7 +354,10 @@ discard block
		block discarded – undo
342	354	public function parseAttributeString($string, $config, $context) {
343	355	$string = (string) $string; // quick typecast
344	356
345		- if ($string == '') return array(); // no attributes
	357	+ if ($string == '') {
	358	+ return array();
	359	+ }
	360	+ // no attributes
346	361
347	362	$e = false;
348	363	if ($config->get('Core.CollectErrors')) {
		@@ -361,10 +376,14 @@ discard block
		block discarded – undo
361	376	list($key, $quoted_value) = explode('=', $string);
362	377	$quoted_value = trim($quoted_value);
363	378	if (!$key) {
364		- if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
	379	+ if ($e) {
	380	+ $e->send(E_ERROR, 'Lexer: Missing attribute key');
	381	+ }
365	382	return array();
366	383	}
367		- if (!$quoted_value) return array($key => '');
	384	+ if (!$quoted_value) {
	385	+ return array($key => '');
	386	+ }
368	387	$first_char = @$quoted_value[0];
369	388	$last_char = @$quoted_value[strlen($quoted_value)-1];
370	389
		@@ -377,13 +396,17 @@ discard block
		block discarded – undo
377	396	} else {
378	397	// not well behaved
379	398	if ($open_quote) {
380		- if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote');
	399	+ if ($e) {
	400	+ $e->send(E_ERROR, 'Lexer: Missing end quote');
	401	+ }
381	402	$value = substr($quoted_value, 1);
382	403	} else {
383	404	$value = $quoted_value;
384	405	}
385	406	}
386		- if ($value === false) $value = '';
	407	+ if ($value === false) {
	408	+ $value = '';
	409	+ }
387	410	return array($key => $this->parseData($value));
388	411	}
389	412
		@@ -415,7 +438,9 @@ discard block
		block discarded – undo
415	438	$key = substr($string, $key_begin, $key_end - $key_begin);
416	439
417	440	if (!$key) {
418		- if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
	441	+ if ($e) {
	442	+ $e->send(E_ERROR, 'Lexer: Missing attribute key');
	443	+ }
419	444	$cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
420	445	continue; // empty key
421	446	}
		@@ -467,7 +492,9 @@ discard block
		block discarded – undo
467	492	}
468	493
469	494	$value = substr($string, $value_begin, $value_end - $value_begin);
470		- if ($value === false) $value = '';
	495	+ if ($value === false) {
	496	+ $value = '';
	497	+ }
471	498	$array[$key] = $this->parseData($value);
472	499	$cursor++;
473	500
		@@ -477,7 +504,9 @@ discard block
		block discarded – undo
477	504	$array[$key] = $key;
478	505	} else {
479	506	// purely theoretical
480		- if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
	507	+ if ($e) {
	508	+ $e->send(E_ERROR, 'Lexer: Missing attribute key');
	509	+ }
481	510	}
482	511
483	512	}