GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Completed
Push — master ( b130b6...8a2f54 )
by gyeong-won
07:36
created
classes/security/htmlpurifier/library/HTMLPurifier/Encoder.php 4 patches
Doc Comments   +5 added lines patch added patch discarded remove patch
@@ -31,6 +31,9 @@  discard block
 block discarded – undo
31 31
 
32 32
     /**
33 33
      * iconv wrapper which mutes errors and works around bugs.
34
+     * @param string $in
35
+     * @param string $out
36
+     * @param string $text
34 37
      */
35 38
     public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
36 39
         $code = self::testIconvTruncateBug();
@@ -332,6 +335,7 @@  discard block
 block discarded – undo
332 335
 
333 336
     /**
334 337
      * Converts a string to UTF-8 based on configuration.
338
+     * @param HTMLPurifier_Context $context
335 339
      */
336 340
     public static function convertToUTF8($str, $config, $context) {
337 341
         $encoding = $config->get('Core.Encoding');
@@ -362,6 +366,7 @@  discard block
 block discarded – undo
362 366
      * Converts a string from UTF-8 based on configuration.
363 367
      * @note Currently, this is a lossy conversion, with unexpressable
364 368
      *       characters being omitted.
369
+     * @param HTMLPurifier_Context $context
365 370
      */
366 371
     public static function convertFromUTF8($str, $config, $context) {
367 372
         $encoding = $config->get('Core.Encoding');
Please login to merge, or discard this patch.
Indentation   +526 added lines, -526 removed lines patch added patch discarded remove patch
@@ -7,532 +7,532 @@
 block discarded – undo
7 7
 class HTMLPurifier_Encoder
8 8
 {
9 9
 
10
-    /**
11
-     * Constructor throws fatal error if you attempt to instantiate class
12
-     */
13
-    private function __construct() {
14
-        trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
15
-    }
16
-
17
-    /**
18
-     * Error-handler that mutes errors, alternative to shut-up operator.
19
-     */
20
-    public static function muteErrorHandler() {}
21
-
22
-    /**
23
-     * iconv wrapper which mutes errors, but doesn't work around bugs.
24
-     */
25
-    public static function unsafeIconv($in, $out, $text) {
26
-        set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
27
-        $r = iconv($in, $out, $text);
28
-        restore_error_handler();
29
-        return $r;
30
-    }
31
-
32
-    /**
33
-     * iconv wrapper which mutes errors and works around bugs.
34
-     */
35
-    public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
36
-        $code = self::testIconvTruncateBug();
37
-        if ($code == self::ICONV_OK) {
38
-            return self::unsafeIconv($in, $out, $text);
39
-        } elseif ($code == self::ICONV_TRUNCATES) {
40
-            // we can only work around this if the input character set
41
-            // is utf-8
42
-            if ($in == 'utf-8') {
43
-                if ($max_chunk_size < 4) {
44
-                    trigger_error('max_chunk_size is too small', E_USER_WARNING);
45
-                    return false;
46
-                }
47
-                // split into 8000 byte chunks, but be careful to handle
48
-                // multibyte boundaries properly
49
-                if (($c = strlen($text)) <= $max_chunk_size) {
50
-                    return self::unsafeIconv($in, $out, $text);
51
-                }
52
-                $r = '';
53
-                $i = 0;
54
-                while (true) {
55
-                    if ($i + $max_chunk_size >= $c) {
56
-                        $r .= self::unsafeIconv($in, $out, substr($text, $i));
57
-                        break;
58
-                    }
59
-                    // wibble the boundary
60
-                    if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
61
-                        $chunk_size = $max_chunk_size;
62
-                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
63
-                        $chunk_size = $max_chunk_size - 1;
64
-                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
65
-                        $chunk_size = $max_chunk_size - 2;
66
-                    } elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
67
-                        $chunk_size = $max_chunk_size - 3;
68
-                    } else {
69
-                        return false; // rather confusing UTF-8...
70
-                    }
71
-                    $chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
72
-                    $r .= self::unsafeIconv($in, $out, $chunk);
73
-                    $i += $chunk_size;
74
-                }
75
-                return $r;
76
-            } else {
77
-                return false;
78
-            }
79
-        } else {
80
-            return false;
81
-        }
82
-    }
83
-
84
-    /**
85
-     * Cleans a UTF-8 string for well-formedness and SGML validity
86
-     *
87
-     * It will parse according to UTF-8 and return a valid UTF8 string, with
88
-     * non-SGML codepoints excluded.
89
-     *
90
-     * @note Just for reference, the non-SGML code points are 0 to 31 and
91
-     *       127 to 159, inclusive.  However, we allow code points 9, 10
92
-     *       and 13, which are the tab, line feed and carriage return
93
-     *       respectively. 128 and above the code points map to multibyte
94
-     *       UTF-8 representations.
95
-     *
96
-     * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and
97
-     *       [email protected] at <http://iki.fi/hsivonen/php-utf8/> under the
98
-     *       LGPL license.  Notes on what changed are inside, but in general,
99
-     *       the original code transformed UTF-8 text into an array of integer
100
-     *       Unicode codepoints. Understandably, transforming that back to
101
-     *       a string would be somewhat expensive, so the function was modded to
102
-     *       directly operate on the string.  However, this discourages code
103
-     *       reuse, and the logic enumerated here would be useful for any
104
-     *       function that needs to be able to understand UTF-8 characters.
105
-     *       As of right now, only smart lossless character encoding converters
106
-     *       would need that, and I'm probably not going to implement them.
107
-     *       Once again, PHP 6 should solve all our problems.
108
-     */
109
-    public static function cleanUTF8($str, $force_php = false) {
110
-
111
-        // UTF-8 validity is checked since PHP 4.3.5
112
-        // This is an optimization: if the string is already valid UTF-8, no
113
-        // need to do PHP stuff. 99% of the time, this will be the case.
114
-        // The regexp matches the XML char production, as well as well as excluding
115
-        // non-SGML codepoints U+007F to U+009F
116
-        if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) {
117
-            return $str;
118
-        }
119
-
120
-        $mState = 0; // cached expected number of octets after the current octet
121
-                     // until the beginning of the next UTF8 character sequence
122
-        $mUcs4  = 0; // cached Unicode character
123
-        $mBytes = 1; // cached expected number of octets in the current sequence
124
-
125
-        // original code involved an $out that was an array of Unicode
126
-        // codepoints.  Instead of having to convert back into UTF-8, we've
127
-        // decided to directly append valid UTF-8 characters onto a string
128
-        // $out once they're done.  $char accumulates raw bytes, while $mUcs4
129
-        // turns into the Unicode code point, so there's some redundancy.
130
-
131
-        $out = '';
132
-        $char = '';
133
-
134
-        $len = strlen($str);
135
-        for($i = 0; $i < $len; $i++) {
136
-            $in = ord($str{$i});
137
-            $char .= $str[$i]; // append byte to char
138
-            if (0 == $mState) {
139
-                // When mState is zero we expect either a US-ASCII character
140
-                // or a multi-octet sequence.
141
-                if (0 == (0x80 & ($in))) {
142
-                    // US-ASCII, pass straight through.
143
-                    if (($in <= 31 || $in == 127) &&
144
-                        !($in == 9 || $in == 13 || $in == 10) // save \r\t\n
145
-                    ) {
146
-                        // control characters, remove
147
-                    } else {
148
-                        $out .= $char;
149
-                    }
150
-                    // reset
151
-                    $char = '';
152
-                    $mBytes = 1;
153
-                } elseif (0xC0 == (0xE0 & ($in))) {
154
-                    // First octet of 2 octet sequence
155
-                    $mUcs4 = ($in);
156
-                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
157
-                    $mState = 1;
158
-                    $mBytes = 2;
159
-                } elseif (0xE0 == (0xF0 & ($in))) {
160
-                    // First octet of 3 octet sequence
161
-                    $mUcs4 = ($in);
162
-                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
163
-                    $mState = 2;
164
-                    $mBytes = 3;
165
-                } elseif (0xF0 == (0xF8 & ($in))) {
166
-                    // First octet of 4 octet sequence
167
-                    $mUcs4 = ($in);
168
-                    $mUcs4 = ($mUcs4 & 0x07) << 18;
169
-                    $mState = 3;
170
-                    $mBytes = 4;
171
-                } elseif (0xF8 == (0xFC & ($in))) {
172
-                    // First octet of 5 octet sequence.
173
-                    //
174
-                    // This is illegal because the encoded codepoint must be
175
-                    // either:
176
-                    // (a) not the shortest form or
177
-                    // (b) outside the Unicode range of 0-0x10FFFF.
178
-                    // Rather than trying to resynchronize, we will carry on
179
-                    // until the end of the sequence and let the later error
180
-                    // handling code catch it.
181
-                    $mUcs4 = ($in);
182
-                    $mUcs4 = ($mUcs4 & 0x03) << 24;
183
-                    $mState = 4;
184
-                    $mBytes = 5;
185
-                } elseif (0xFC == (0xFE & ($in))) {
186
-                    // First octet of 6 octet sequence, see comments for 5
187
-                    // octet sequence.
188
-                    $mUcs4 = ($in);
189
-                    $mUcs4 = ($mUcs4 & 1) << 30;
190
-                    $mState = 5;
191
-                    $mBytes = 6;
192
-                } else {
193
-                    // Current octet is neither in the US-ASCII range nor a
194
-                    // legal first octet of a multi-octet sequence.
195
-                    $mState = 0;
196
-                    $mUcs4  = 0;
197
-                    $mBytes = 1;
198
-                    $char = '';
199
-                }
200
-            } else {
201
-                // When mState is non-zero, we expect a continuation of the
202
-                // multi-octet sequence
203
-                if (0x80 == (0xC0 & ($in))) {
204
-                    // Legal continuation.
205
-                    $shift = ($mState - 1) * 6;
206
-                    $tmp = $in;
207
-                    $tmp = ($tmp & 0x0000003F) << $shift;
208
-                    $mUcs4 |= $tmp;
209
-
210
-                    if (0 == --$mState) {
211
-                        // End of the multi-octet sequence. mUcs4 now contains
212
-                        // the final Unicode codepoint to be output
213
-
214
-                        // Check for illegal sequences and codepoints.
215
-
216
-                        // From Unicode 3.1, non-shortest form is illegal
217
-                        if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
218
-                            ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
219
-                            ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
220
-                            (4 < $mBytes) ||
221
-                            // From Unicode 3.2, surrogate characters = illegal
222
-                            (($mUcs4 & 0xFFFFF800) == 0xD800) ||
223
-                            // Codepoints outside the Unicode range are illegal
224
-                            ($mUcs4 > 0x10FFFF)
225
-                        ) {
226
-
227
-                        } elseif (0xFEFF != $mUcs4 && // omit BOM
228
-                            // check for valid Char unicode codepoints
229
-                            (
230
-                                0x9 == $mUcs4 ||
231
-                                0xA == $mUcs4 ||
232
-                                0xD == $mUcs4 ||
233
-                                (0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
234
-                                // 7F-9F is not strictly prohibited by XML,
235
-                                // but it is non-SGML, and thus we don't allow it
236
-                                (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
237
-                                (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
238
-                            )
239
-                        ) {
240
-                            $out .= $char;
241
-                        }
242
-                        // initialize UTF8 cache (reset)
243
-                        $mState = 0;
244
-                        $mUcs4  = 0;
245
-                        $mBytes = 1;
246
-                        $char = '';
247
-                    }
248
-                } else {
249
-                    // ((0xC0 & (*in) != 0x80) && (mState != 0))
250
-                    // Incomplete multi-octet sequence.
251
-                    // used to result in complete fail, but we'll reset
252
-                    $mState = 0;
253
-                    $mUcs4  = 0;
254
-                    $mBytes = 1;
255
-                    $char ='';
256
-                }
257
-            }
258
-        }
259
-        return $out;
260
-    }
261
-
262
-    /**
263
-     * Translates a Unicode codepoint into its corresponding UTF-8 character.
264
-     * @note Based on Feyd's function at
265
-     *       <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
266
-     *       which is in public domain.
267
-     * @note While we're going to do code point parsing anyway, a good
268
-     *       optimization would be to refuse to translate code points that
269
-     *       are non-SGML characters.  However, this could lead to duplication.
270
-     * @note This is very similar to the unichr function in
271
-     *       maintenance/generate-entity-file.php (although this is superior,
272
-     *       due to its sanity checks).
273
-     */
274
-
275
-    // +----------+----------+----------+----------+
276
-    // | 33222222 | 22221111 | 111111   |          |
277
-    // | 10987654 | 32109876 | 54321098 | 76543210 | bit
278
-    // +----------+----------+----------+----------+
279
-    // |          |          |          | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
280
-    // |          |          | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
281
-    // |          | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
282
-    // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
283
-    // +----------+----------+----------+----------+
284
-    // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
285
-    // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
286
-    // +----------+----------+----------+----------+
287
-
288
-    public static function unichr($code) {
289
-        if($code > 1114111 or $code < 0 or
290
-          ($code >= 55296 and $code <= 57343) ) {
291
-            // bits are set outside the "valid" range as defined
292
-            // by UNICODE 4.1.0
293
-            return '';
294
-        }
295
-
296
-        $x = $y = $z = $w = 0;
297
-        if ($code < 128) {
298
-            // regular ASCII character
299
-            $x = $code;
300
-        } else {
301
-            // set up bits for UTF-8
302
-            $x = ($code & 63) | 128;
303
-            if ($code < 2048) {
304
-                $y = (($code & 2047) >> 6) | 192;
305
-            } else {
306
-                $y = (($code & 4032) >> 6) | 128;
307
-                if($code < 65536) {
308
-                    $z = (($code >> 12) & 15) | 224;
309
-                } else {
310
-                    $z = (($code >> 12) & 63) | 128;
311
-                    $w = (($code >> 18) & 7)  | 240;
312
-                }
313
-            }
314
-        }
315
-        // set up the actual character
316
-        $ret = '';
317
-        if($w) $ret .= chr($w);
318
-        if($z) $ret .= chr($z);
319
-        if($y) $ret .= chr($y);
320
-        $ret .= chr($x);
321
-
322
-        return $ret;
323
-    }
324
-
325
-    public static function iconvAvailable() {
326
-        static $iconv = null;
327
-        if ($iconv === null) {
328
-            $iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
329
-        }
330
-        return $iconv;
331
-    }
332
-
333
-    /**
334
-     * Converts a string to UTF-8 based on configuration.
335
-     */
336
-    public static function convertToUTF8($str, $config, $context) {
337
-        $encoding = $config->get('Core.Encoding');
338
-        if ($encoding === 'utf-8') return $str;
339
-        static $iconv = null;
340
-        if ($iconv === null) $iconv = self::iconvAvailable();
341
-        if ($iconv && !$config->get('Test.ForceNoIconv')) {
342
-            // unaffected by bugs, since UTF-8 support all characters
343
-            $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
344
-            if ($str === false) {
345
-                // $encoding is not a valid encoding
346
-                trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
347
-                return '';
348
-            }
349
-            // If the string is bjorked by Shift_JIS or a similar encoding
350
-            // that doesn't support all of ASCII, convert the naughty
351
-            // characters to their true byte-wise ASCII/UTF-8 equivalents.
352
-            $str = strtr($str, self::testEncodingSupportsASCII($encoding));
353
-            return $str;
354
-        } elseif ($encoding === 'iso-8859-1') {
355
-            $str = utf8_encode($str);
356
-            return $str;
357
-        }
358
-        trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
359
-    }
360
-
361
-    /**
362
-     * Converts a string from UTF-8 based on configuration.
363
-     * @note Currently, this is a lossy conversion, with unexpressable
364
-     *       characters being omitted.
365
-     */
366
-    public static function convertFromUTF8($str, $config, $context) {
367
-        $encoding = $config->get('Core.Encoding');
368
-        if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
369
-            $str = self::convertToASCIIDumbLossless($str);
370
-        }
371
-        if ($encoding === 'utf-8') return $str;
372
-        static $iconv = null;
373
-        if ($iconv === null) $iconv = self::iconvAvailable();
374
-        if ($iconv && !$config->get('Test.ForceNoIconv')) {
375
-            // Undo our previous fix in convertToUTF8, otherwise iconv will barf
376
-            $ascii_fix = self::testEncodingSupportsASCII($encoding);
377
-            if (!$escape && !empty($ascii_fix)) {
378
-                $clear_fix = array();
379
-                foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
380
-                $str = strtr($str, $clear_fix);
381
-            }
382
-            $str = strtr($str, array_flip($ascii_fix));
383
-            // Normal stuff
384
-            $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
385
-            return $str;
386
-        } elseif ($encoding === 'iso-8859-1') {
387
-            $str = utf8_decode($str);
388
-            return $str;
389
-        }
390
-        trigger_error('Encoding not supported', E_USER_ERROR);
391
-        // You might be tempted to assume that the ASCII representation
392
-        // might be OK, however, this is *not* universally true over all
393
-        // encodings.  So we take the conservative route here, rather
394
-        // than forcibly turn on %Core.EscapeNonASCIICharacters
395
-    }
396
-
397
-    /**
398
-     * Lossless (character-wise) conversion of HTML to ASCII
399
-     * @param $str UTF-8 string to be converted to ASCII
400
-     * @returns ASCII encoded string with non-ASCII character entity-ized
401
-     * @warning Adapted from MediaWiki, claiming fair use: this is a common
402
-     *       algorithm. If you disagree with this license fudgery,
403
-     *       implement it yourself.
404
-     * @note Uses decimal numeric entities since they are best supported.
405
-     * @note This is a DUMB function: it has no concept of keeping
406
-     *       character entities that the projected character encoding
407
-     *       can allow. We could possibly implement a smart version
408
-     *       but that would require it to also know which Unicode
409
-     *       codepoints the charset supported (not an easy task).
410
-     * @note Sort of with cleanUTF8() but it assumes that $str is
411
-     *       well-formed UTF-8
412
-     */
413
-    public static function convertToASCIIDumbLossless($str) {
414
-        $bytesleft = 0;
415
-        $result = '';
416
-        $working = 0;
417
-        $len = strlen($str);
418
-        for( $i = 0; $i < $len; $i++ ) {
419
-            $bytevalue = ord( $str[$i] );
420
-            if( $bytevalue <= 0x7F ) { //0xxx xxxx
421
-                $result .= chr( $bytevalue );
422
-                $bytesleft = 0;
423
-            } elseif( $bytevalue <= 0xBF ) { //10xx xxxx
424
-                $working = $working << 6;
425
-                $working += ($bytevalue & 0x3F);
426
-                $bytesleft--;
427
-                if( $bytesleft <= 0 ) {
428
-                    $result .= "&#" . $working . ";";
429
-                }
430
-            } elseif( $bytevalue <= 0xDF ) { //110x xxxx
431
-                $working = $bytevalue & 0x1F;
432
-                $bytesleft = 1;
433
-            } elseif( $bytevalue <= 0xEF ) { //1110 xxxx
434
-                $working = $bytevalue & 0x0F;
435
-                $bytesleft = 2;
436
-            } else { //1111 0xxx
437
-                $working = $bytevalue & 0x07;
438
-                $bytesleft = 3;
439
-            }
440
-        }
441
-        return $result;
442
-    }
443
-
444
-    /** No bugs detected in iconv. */
445
-    const ICONV_OK = 0;
446
-
447
-    /** Iconv truncates output if converting from UTF-8 to another
448
-     *  character set with //IGNORE, and a non-encodable character is found */
449
-    const ICONV_TRUNCATES = 1;
450
-
451
-    /** Iconv does not support //IGNORE, making it unusable for
452
-     *  transcoding purposes */
453
-    const ICONV_UNUSABLE = 2;
454
-
455
-    /**
456
-     * glibc iconv has a known bug where it doesn't handle the magic
457
-     * //IGNORE stanza correctly.  In particular, rather than ignore
458
-     * characters, it will return an EILSEQ after consuming some number
459
-     * of characters, and expect you to restart iconv as if it were
460
-     * an E2BIG.  Old versions of PHP did not respect the errno, and
461
-     * returned the fragment, so as a result you would see iconv
462
-     * mysteriously truncating output. We can work around this by
463
-     * manually chopping our input into segments of about 8000
464
-     * characters, as long as PHP ignores the error code.  If PHP starts
465
-     * paying attention to the error code, iconv becomes unusable.
466
-     *
467
-     * @returns Error code indicating severity of bug.
468
-     */
469
-    public static function testIconvTruncateBug() {
470
-        static $code = null;
471
-        if ($code === null) {
472
-            // better not use iconv, otherwise infinite loop!
473
-            $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
474
-            if ($r === false) {
475
-                $code = self::ICONV_UNUSABLE;
476
-            } elseif (($c = strlen($r)) < 9000) {
477
-                $code = self::ICONV_TRUNCATES;
478
-            } elseif ($c > 9000) {
479
-                trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR);
480
-            } else {
481
-                $code = self::ICONV_OK;
482
-            }
483
-        }
484
-        return $code;
485
-    }
486
-
487
-    /**
488
-     * This expensive function tests whether or not a given character
489
-     * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
490
-     * fail this test, and require special processing. Variable width
491
-     * encodings shouldn't ever fail.
492
-     *
493
-     * @param string $encoding Encoding name to test, as per iconv format
494
-     * @param bool $bypass Whether or not to bypass the precompiled arrays.
495
-     * @return Array of UTF-8 characters to their corresponding ASCII,
496
-     *      which can be used to "undo" any overzealous iconv action.
497
-     */
498
-    public static function testEncodingSupportsASCII($encoding, $bypass = false) {
499
-        // All calls to iconv here are unsafe, proof by case analysis:
500
-        // If ICONV_OK, no difference.
501
-        // If ICONV_TRUNCATE, all calls involve one character inputs,
502
-        // so bug is not triggered.
503
-        // If ICONV_UNUSABLE, this call is irrelevant
504
-        static $encodings = array();
505
-        if (!$bypass) {
506
-            if (isset($encodings[$encoding])) return $encodings[$encoding];
507
-            $lenc = strtolower($encoding);
508
-            switch ($lenc) {
509
-                case 'shift_jis':
510
-                    return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
511
-                case 'johab':
512
-                    return array("\xE2\x82\xA9" => '\\');
513
-            }
514
-            if (strpos($lenc, 'iso-8859-') === 0) return array();
515
-        }
516
-        $ret = array();
517
-        if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
518
-        for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
519
-            $c = chr($i); // UTF-8 char
520
-            $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
521
-            if (
522
-                $r === '' ||
523
-                // This line is needed for iconv implementations that do not
524
-                // omit characters that do not exist in the target character set
525
-                ($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
526
-            ) {
527
-                // Reverse engineer: what's the UTF-8 equiv of this byte
528
-                // sequence? This assumes that there's no variable width
529
-                // encoding that doesn't support ASCII.
530
-                $ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
531
-            }
532
-        }
533
-        $encodings[$encoding] = $ret;
534
-        return $ret;
535
-    }
10
+	/**
11
+	 * Constructor throws fatal error if you attempt to instantiate class
12
+	 */
13
+	private function __construct() {
14
+		trigger_error('Cannot instantiate encoder, call methods statically', E_USER_ERROR);
15
+	}
16
+
17
+	/**
18
+	 * Error-handler that mutes errors, alternative to shut-up operator.
19
+	 */
20
+	public static function muteErrorHandler() {}
21
+
22
+	/**
23
+	 * iconv wrapper which mutes errors, but doesn't work around bugs.
24
+	 */
25
+	public static function unsafeIconv($in, $out, $text) {
26
+		set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler'));
27
+		$r = iconv($in, $out, $text);
28
+		restore_error_handler();
29
+		return $r;
30
+	}
31
+
32
+	/**
33
+	 * iconv wrapper which mutes errors and works around bugs.
34
+	 */
35
+	public static function iconv($in, $out, $text, $max_chunk_size = 8000) {
36
+		$code = self::testIconvTruncateBug();
37
+		if ($code == self::ICONV_OK) {
38
+			return self::unsafeIconv($in, $out, $text);
39
+		} elseif ($code == self::ICONV_TRUNCATES) {
40
+			// we can only work around this if the input character set
41
+			// is utf-8
42
+			if ($in == 'utf-8') {
43
+				if ($max_chunk_size < 4) {
44
+					trigger_error('max_chunk_size is too small', E_USER_WARNING);
45
+					return false;
46
+				}
47
+				// split into 8000 byte chunks, but be careful to handle
48
+				// multibyte boundaries properly
49
+				if (($c = strlen($text)) <= $max_chunk_size) {
50
+					return self::unsafeIconv($in, $out, $text);
51
+				}
52
+				$r = '';
53
+				$i = 0;
54
+				while (true) {
55
+					if ($i + $max_chunk_size >= $c) {
56
+						$r .= self::unsafeIconv($in, $out, substr($text, $i));
57
+						break;
58
+					}
59
+					// wibble the boundary
60
+					if (0x80 != (0xC0 & ord($text[$i + $max_chunk_size]))) {
61
+						$chunk_size = $max_chunk_size;
62
+					} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 1]))) {
63
+						$chunk_size = $max_chunk_size - 1;
64
+					} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 2]))) {
65
+						$chunk_size = $max_chunk_size - 2;
66
+					} elseif (0x80 != (0xC0 & ord($text[$i + $max_chunk_size - 3]))) {
67
+						$chunk_size = $max_chunk_size - 3;
68
+					} else {
69
+						return false; // rather confusing UTF-8...
70
+					}
71
+					$chunk = substr($text, $i, $chunk_size); // substr doesn't mind overlong lengths
72
+					$r .= self::unsafeIconv($in, $out, $chunk);
73
+					$i += $chunk_size;
74
+				}
75
+				return $r;
76
+			} else {
77
+				return false;
78
+			}
79
+		} else {
80
+			return false;
81
+		}
82
+	}
83
+
84
+	/**
85
+	 * Cleans a UTF-8 string for well-formedness and SGML validity
86
+	 *
87
+	 * It will parse according to UTF-8 and return a valid UTF8 string, with
88
+	 * non-SGML codepoints excluded.
89
+	 *
90
+	 * @note Just for reference, the non-SGML code points are 0 to 31 and
91
+	 *       127 to 159, inclusive.  However, we allow code points 9, 10
92
+	 *       and 13, which are the tab, line feed and carriage return
93
+	 *       respectively. 128 and above the code points map to multibyte
94
+	 *       UTF-8 representations.
95
+	 *
96
+	 * @note Fallback code adapted from utf8ToUnicode by Henri Sivonen and
97
+	 *       [email protected] at <http://iki.fi/hsivonen/php-utf8/> under the
98
+	 *       LGPL license.  Notes on what changed are inside, but in general,
99
+	 *       the original code transformed UTF-8 text into an array of integer
100
+	 *       Unicode codepoints. Understandably, transforming that back to
101
+	 *       a string would be somewhat expensive, so the function was modded to
102
+	 *       directly operate on the string.  However, this discourages code
103
+	 *       reuse, and the logic enumerated here would be useful for any
104
+	 *       function that needs to be able to understand UTF-8 characters.
105
+	 *       As of right now, only smart lossless character encoding converters
106
+	 *       would need that, and I'm probably not going to implement them.
107
+	 *       Once again, PHP 6 should solve all our problems.
108
+	 */
109
+	public static function cleanUTF8($str, $force_php = false) {
110
+
111
+		// UTF-8 validity is checked since PHP 4.3.5
112
+		// This is an optimization: if the string is already valid UTF-8, no
113
+		// need to do PHP stuff. 99% of the time, this will be the case.
114
+		// The regexp matches the XML char production, as well as well as excluding
115
+		// non-SGML codepoints U+007F to U+009F
116
+		if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) {
117
+			return $str;
118
+		}
119
+
120
+		$mState = 0; // cached expected number of octets after the current octet
121
+					 // until the beginning of the next UTF8 character sequence
122
+		$mUcs4  = 0; // cached Unicode character
123
+		$mBytes = 1; // cached expected number of octets in the current sequence
124
+
125
+		// original code involved an $out that was an array of Unicode
126
+		// codepoints.  Instead of having to convert back into UTF-8, we've
127
+		// decided to directly append valid UTF-8 characters onto a string
128
+		// $out once they're done.  $char accumulates raw bytes, while $mUcs4
129
+		// turns into the Unicode code point, so there's some redundancy.
130
+
131
+		$out = '';
132
+		$char = '';
133
+
134
+		$len = strlen($str);
135
+		for($i = 0; $i < $len; $i++) {
136
+			$in = ord($str{$i});
137
+			$char .= $str[$i]; // append byte to char
138
+			if (0 == $mState) {
139
+				// When mState is zero we expect either a US-ASCII character
140
+				// or a multi-octet sequence.
141
+				if (0 == (0x80 & ($in))) {
142
+					// US-ASCII, pass straight through.
143
+					if (($in <= 31 || $in == 127) &&
144
+						!($in == 9 || $in == 13 || $in == 10) // save \r\t\n
145
+					) {
146
+						// control characters, remove
147
+					} else {
148
+						$out .= $char;
149
+					}
150
+					// reset
151
+					$char = '';
152
+					$mBytes = 1;
153
+				} elseif (0xC0 == (0xE0 & ($in))) {
154
+					// First octet of 2 octet sequence
155
+					$mUcs4 = ($in);
156
+					$mUcs4 = ($mUcs4 & 0x1F) << 6;
157
+					$mState = 1;
158
+					$mBytes = 2;
159
+				} elseif (0xE0 == (0xF0 & ($in))) {
160
+					// First octet of 3 octet sequence
161
+					$mUcs4 = ($in);
162
+					$mUcs4 = ($mUcs4 & 0x0F) << 12;
163
+					$mState = 2;
164
+					$mBytes = 3;
165
+				} elseif (0xF0 == (0xF8 & ($in))) {
166
+					// First octet of 4 octet sequence
167
+					$mUcs4 = ($in);
168
+					$mUcs4 = ($mUcs4 & 0x07) << 18;
169
+					$mState = 3;
170
+					$mBytes = 4;
171
+				} elseif (0xF8 == (0xFC & ($in))) {
172
+					// First octet of 5 octet sequence.
173
+					//
174
+					// This is illegal because the encoded codepoint must be
175
+					// either:
176
+					// (a) not the shortest form or
177
+					// (b) outside the Unicode range of 0-0x10FFFF.
178
+					// Rather than trying to resynchronize, we will carry on
179
+					// until the end of the sequence and let the later error
180
+					// handling code catch it.
181
+					$mUcs4 = ($in);
182
+					$mUcs4 = ($mUcs4 & 0x03) << 24;
183
+					$mState = 4;
184
+					$mBytes = 5;
185
+				} elseif (0xFC == (0xFE & ($in))) {
186
+					// First octet of 6 octet sequence, see comments for 5
187
+					// octet sequence.
188
+					$mUcs4 = ($in);
189
+					$mUcs4 = ($mUcs4 & 1) << 30;
190
+					$mState = 5;
191
+					$mBytes = 6;
192
+				} else {
193
+					// Current octet is neither in the US-ASCII range nor a
194
+					// legal first octet of a multi-octet sequence.
195
+					$mState = 0;
196
+					$mUcs4  = 0;
197
+					$mBytes = 1;
198
+					$char = '';
199
+				}
200
+			} else {
201
+				// When mState is non-zero, we expect a continuation of the
202
+				// multi-octet sequence
203
+				if (0x80 == (0xC0 & ($in))) {
204
+					// Legal continuation.
205
+					$shift = ($mState - 1) * 6;
206
+					$tmp = $in;
207
+					$tmp = ($tmp & 0x0000003F) << $shift;
208
+					$mUcs4 |= $tmp;
209
+
210
+					if (0 == --$mState) {
211
+						// End of the multi-octet sequence. mUcs4 now contains
212
+						// the final Unicode codepoint to be output
213
+
214
+						// Check for illegal sequences and codepoints.
215
+
216
+						// From Unicode 3.1, non-shortest form is illegal
217
+						if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
218
+							((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
219
+							((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
220
+							(4 < $mBytes) ||
221
+							// From Unicode 3.2, surrogate characters = illegal
222
+							(($mUcs4 & 0xFFFFF800) == 0xD800) ||
223
+							// Codepoints outside the Unicode range are illegal
224
+							($mUcs4 > 0x10FFFF)
225
+						) {
226
+
227
+						} elseif (0xFEFF != $mUcs4 && // omit BOM
228
+							// check for valid Char unicode codepoints
229
+							(
230
+								0x9 == $mUcs4 ||
231
+								0xA == $mUcs4 ||
232
+								0xD == $mUcs4 ||
233
+								(0x20 <= $mUcs4 && 0x7E >= $mUcs4) ||
234
+								// 7F-9F is not strictly prohibited by XML,
235
+								// but it is non-SGML, and thus we don't allow it
236
+								(0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) ||
237
+								(0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4)
238
+							)
239
+						) {
240
+							$out .= $char;
241
+						}
242
+						// initialize UTF8 cache (reset)
243
+						$mState = 0;
244
+						$mUcs4  = 0;
245
+						$mBytes = 1;
246
+						$char = '';
247
+					}
248
+				} else {
249
+					// ((0xC0 & (*in) != 0x80) && (mState != 0))
250
+					// Incomplete multi-octet sequence.
251
+					// used to result in complete fail, but we'll reset
252
+					$mState = 0;
253
+					$mUcs4  = 0;
254
+					$mBytes = 1;
255
+					$char ='';
256
+				}
257
+			}
258
+		}
259
+		return $out;
260
+	}
261
+
262
+	/**
263
+	 * Translates a Unicode codepoint into its corresponding UTF-8 character.
264
+	 * @note Based on Feyd's function at
265
+	 *       <http://forums.devnetwork.net/viewtopic.php?p=191404#191404>,
266
+	 *       which is in public domain.
267
+	 * @note While we're going to do code point parsing anyway, a good
268
+	 *       optimization would be to refuse to translate code points that
269
+	 *       are non-SGML characters.  However, this could lead to duplication.
270
+	 * @note This is very similar to the unichr function in
271
+	 *       maintenance/generate-entity-file.php (although this is superior,
272
+	 *       due to its sanity checks).
273
+	 */
274
+
275
+	// +----------+----------+----------+----------+
276
+	// | 33222222 | 22221111 | 111111   |          |
277
+	// | 10987654 | 32109876 | 54321098 | 76543210 | bit
278
+	// +----------+----------+----------+----------+
279
+	// |          |          |          | 0xxxxxxx | 1 byte 0x00000000..0x0000007F
280
+	// |          |          | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF
281
+	// |          | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF
282
+	// | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF
283
+	// +----------+----------+----------+----------+
284
+	// | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF)
285
+	// | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes
286
+	// +----------+----------+----------+----------+
287
+
288
+	public static function unichr($code) {
289
+		if($code > 1114111 or $code < 0 or
290
+		  ($code >= 55296 and $code <= 57343) ) {
291
+			// bits are set outside the "valid" range as defined
292
+			// by UNICODE 4.1.0
293
+			return '';
294
+		}
295
+
296
+		$x = $y = $z = $w = 0;
297
+		if ($code < 128) {
298
+			// regular ASCII character
299
+			$x = $code;
300
+		} else {
301
+			// set up bits for UTF-8
302
+			$x = ($code & 63) | 128;
303
+			if ($code < 2048) {
304
+				$y = (($code & 2047) >> 6) | 192;
305
+			} else {
306
+				$y = (($code & 4032) >> 6) | 128;
307
+				if($code < 65536) {
308
+					$z = (($code >> 12) & 15) | 224;
309
+				} else {
310
+					$z = (($code >> 12) & 63) | 128;
311
+					$w = (($code >> 18) & 7)  | 240;
312
+				}
313
+			}
314
+		}
315
+		// set up the actual character
316
+		$ret = '';
317
+		if($w) $ret .= chr($w);
318
+		if($z) $ret .= chr($z);
319
+		if($y) $ret .= chr($y);
320
+		$ret .= chr($x);
321
+
322
+		return $ret;
323
+	}
324
+
325
+	public static function iconvAvailable() {
326
+		static $iconv = null;
327
+		if ($iconv === null) {
328
+			$iconv = function_exists('iconv') && self::testIconvTruncateBug() != self::ICONV_UNUSABLE;
329
+		}
330
+		return $iconv;
331
+	}
332
+
333
+	/**
334
+	 * Converts a string to UTF-8 based on configuration.
335
+	 */
336
+	public static function convertToUTF8($str, $config, $context) {
337
+		$encoding = $config->get('Core.Encoding');
338
+		if ($encoding === 'utf-8') return $str;
339
+		static $iconv = null;
340
+		if ($iconv === null) $iconv = self::iconvAvailable();
341
+		if ($iconv && !$config->get('Test.ForceNoIconv')) {
342
+			// unaffected by bugs, since UTF-8 support all characters
343
+			$str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
344
+			if ($str === false) {
345
+				// $encoding is not a valid encoding
346
+				trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
347
+				return '';
348
+			}
349
+			// If the string is bjorked by Shift_JIS or a similar encoding
350
+			// that doesn't support all of ASCII, convert the naughty
351
+			// characters to their true byte-wise ASCII/UTF-8 equivalents.
352
+			$str = strtr($str, self::testEncodingSupportsASCII($encoding));
353
+			return $str;
354
+		} elseif ($encoding === 'iso-8859-1') {
355
+			$str = utf8_encode($str);
356
+			return $str;
357
+		}
358
+		trigger_error('Encoding not supported, please install iconv', E_USER_ERROR);
359
+	}
360
+
361
+	/**
362
+	 * Converts a string from UTF-8 based on configuration.
363
+	 * @note Currently, this is a lossy conversion, with unexpressable
364
+	 *       characters being omitted.
365
+	 */
366
+	public static function convertFromUTF8($str, $config, $context) {
367
+		$encoding = $config->get('Core.Encoding');
368
+		if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
369
+			$str = self::convertToASCIIDumbLossless($str);
370
+		}
371
+		if ($encoding === 'utf-8') return $str;
372
+		static $iconv = null;
373
+		if ($iconv === null) $iconv = self::iconvAvailable();
374
+		if ($iconv && !$config->get('Test.ForceNoIconv')) {
375
+			// Undo our previous fix in convertToUTF8, otherwise iconv will barf
376
+			$ascii_fix = self::testEncodingSupportsASCII($encoding);
377
+			if (!$escape && !empty($ascii_fix)) {
378
+				$clear_fix = array();
379
+				foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
380
+				$str = strtr($str, $clear_fix);
381
+			}
382
+			$str = strtr($str, array_flip($ascii_fix));
383
+			// Normal stuff
384
+			$str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
385
+			return $str;
386
+		} elseif ($encoding === 'iso-8859-1') {
387
+			$str = utf8_decode($str);
388
+			return $str;
389
+		}
390
+		trigger_error('Encoding not supported', E_USER_ERROR);
391
+		// You might be tempted to assume that the ASCII representation
392
+		// might be OK, however, this is *not* universally true over all
393
+		// encodings.  So we take the conservative route here, rather
394
+		// than forcibly turn on %Core.EscapeNonASCIICharacters
395
+	}
396
+
397
+	/**
398
+	 * Lossless (character-wise) conversion of HTML to ASCII
399
+	 * @param $str UTF-8 string to be converted to ASCII
400
+	 * @returns ASCII encoded string with non-ASCII character entity-ized
401
+	 * @warning Adapted from MediaWiki, claiming fair use: this is a common
402
+	 *       algorithm. If you disagree with this license fudgery,
403
+	 *       implement it yourself.
404
+	 * @note Uses decimal numeric entities since they are best supported.
405
+	 * @note This is a DUMB function: it has no concept of keeping
406
+	 *       character entities that the projected character encoding
407
+	 *       can allow. We could possibly implement a smart version
408
+	 *       but that would require it to also know which Unicode
409
+	 *       codepoints the charset supported (not an easy task).
410
+	 * @note Sort of with cleanUTF8() but it assumes that $str is
411
+	 *       well-formed UTF-8
412
+	 */
413
+	public static function convertToASCIIDumbLossless($str) {
414
+		$bytesleft = 0;
415
+		$result = '';
416
+		$working = 0;
417
+		$len = strlen($str);
418
+		for( $i = 0; $i < $len; $i++ ) {
419
+			$bytevalue = ord( $str[$i] );
420
+			if( $bytevalue <= 0x7F ) { //0xxx xxxx
421
+				$result .= chr( $bytevalue );
422
+				$bytesleft = 0;
423
+			} elseif( $bytevalue <= 0xBF ) { //10xx xxxx
424
+				$working = $working << 6;
425
+				$working += ($bytevalue & 0x3F);
426
+				$bytesleft--;
427
+				if( $bytesleft <= 0 ) {
428
+					$result .= "&#" . $working . ";";
429
+				}
430
+			} elseif( $bytevalue <= 0xDF ) { //110x xxxx
431
+				$working = $bytevalue & 0x1F;
432
+				$bytesleft = 1;
433
+			} elseif( $bytevalue <= 0xEF ) { //1110 xxxx
434
+				$working = $bytevalue & 0x0F;
435
+				$bytesleft = 2;
436
+			} else { //1111 0xxx
437
+				$working = $bytevalue & 0x07;
438
+				$bytesleft = 3;
439
+			}
440
+		}
441
+		return $result;
442
+	}
443
+
444
+	/** No bugs detected in iconv. */
445
+	const ICONV_OK = 0;
446
+
447
+	/** Iconv truncates output if converting from UTF-8 to another
448
+	 *  character set with //IGNORE, and a non-encodable character is found */
449
+	const ICONV_TRUNCATES = 1;
450
+
451
+	/** Iconv does not support //IGNORE, making it unusable for
452
+	 *  transcoding purposes */
453
+	const ICONV_UNUSABLE = 2;
454
+
455
+	/**
456
+	 * glibc iconv has a known bug where it doesn't handle the magic
457
+	 * //IGNORE stanza correctly.  In particular, rather than ignore
458
+	 * characters, it will return an EILSEQ after consuming some number
459
+	 * of characters, and expect you to restart iconv as if it were
460
+	 * an E2BIG.  Old versions of PHP did not respect the errno, and
461
+	 * returned the fragment, so as a result you would see iconv
462
+	 * mysteriously truncating output. We can work around this by
463
+	 * manually chopping our input into segments of about 8000
464
+	 * characters, as long as PHP ignores the error code.  If PHP starts
465
+	 * paying attention to the error code, iconv becomes unusable.
466
+	 *
467
+	 * @returns Error code indicating severity of bug.
468
+	 */
469
+	public static function testIconvTruncateBug() {
470
+		static $code = null;
471
+		if ($code === null) {
472
+			// better not use iconv, otherwise infinite loop!
473
+			$r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
474
+			if ($r === false) {
475
+				$code = self::ICONV_UNUSABLE;
476
+			} elseif (($c = strlen($r)) < 9000) {
477
+				$code = self::ICONV_TRUNCATES;
478
+			} elseif ($c > 9000) {
479
+				trigger_error('Your copy of iconv is extremely buggy. Please notify HTML Purifier maintainers: include your iconv version as per phpversion()', E_USER_ERROR);
480
+			} else {
481
+				$code = self::ICONV_OK;
482
+			}
483
+		}
484
+		return $code;
485
+	}
486
+
487
+	/**
488
+	 * This expensive function tests whether or not a given character
489
+	 * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will
490
+	 * fail this test, and require special processing. Variable width
491
+	 * encodings shouldn't ever fail.
492
+	 *
493
+	 * @param string $encoding Encoding name to test, as per iconv format
494
+	 * @param bool $bypass Whether or not to bypass the precompiled arrays.
495
+	 * @return Array of UTF-8 characters to their corresponding ASCII,
496
+	 *      which can be used to "undo" any overzealous iconv action.
497
+	 */
498
+	public static function testEncodingSupportsASCII($encoding, $bypass = false) {
499
+		// All calls to iconv here are unsafe, proof by case analysis:
500
+		// If ICONV_OK, no difference.
501
+		// If ICONV_TRUNCATE, all calls involve one character inputs,
502
+		// so bug is not triggered.
503
+		// If ICONV_UNUSABLE, this call is irrelevant
504
+		static $encodings = array();
505
+		if (!$bypass) {
506
+			if (isset($encodings[$encoding])) return $encodings[$encoding];
507
+			$lenc = strtolower($encoding);
508
+			switch ($lenc) {
509
+				case 'shift_jis':
510
+					return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~');
511
+				case 'johab':
512
+					return array("\xE2\x82\xA9" => '\\');
513
+			}
514
+			if (strpos($lenc, 'iso-8859-') === 0) return array();
515
+		}
516
+		$ret = array();
517
+		if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
518
+		for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
519
+			$c = chr($i); // UTF-8 char
520
+			$r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
521
+			if (
522
+				$r === '' ||
523
+				// This line is needed for iconv implementations that do not
524
+				// omit characters that do not exist in the target character set
525
+				($r === $c && self::unsafeIconv($encoding, 'UTF-8//IGNORE', $r) !== $c)
526
+			) {
527
+				// Reverse engineer: what's the UTF-8 equiv of this byte
528
+				// sequence? This assumes that there's no variable width
529
+				// encoding that doesn't support ASCII.
530
+				$ret[self::unsafeIconv($encoding, 'UTF-8//IGNORE', $c)] = $c;
531
+			}
532
+		}
533
+		$encodings[$encoding] = $ret;
534
+		return $ret;
535
+	}
536 536
 
537 537
 
538 538
 }
Please login to merge, or discard this patch.
Braces   +33 added lines, -11 removed lines patch added patch discarded remove patch
@@ -314,9 +314,15 @@  discard block
 block discarded – undo
314 314
         }
315 315
         // set up the actual character
316 316
         $ret = '';
317
-        if($w) $ret .= chr($w);
318
-        if($z) $ret .= chr($z);
319
-        if($y) $ret .= chr($y);
317
+        if($w) {
318
+        	$ret .= chr($w);
319
+        }
320
+        if($z) {
321
+        	$ret .= chr($z);
322
+        }
323
+        if($y) {
324
+        	$ret .= chr($y);
325
+        }
320 326
         $ret .= chr($x);
321 327
 
322 328
         return $ret;
@@ -335,9 +341,13 @@  discard block
 block discarded – undo
335 341
      */
336 342
     public static function convertToUTF8($str, $config, $context) {
337 343
         $encoding = $config->get('Core.Encoding');
338
-        if ($encoding === 'utf-8') return $str;
344
+        if ($encoding === 'utf-8') {
345
+        	return $str;
346
+        }
339 347
         static $iconv = null;
340
-        if ($iconv === null) $iconv = self::iconvAvailable();
348
+        if ($iconv === null) {
349
+        	$iconv = self::iconvAvailable();
350
+        }
341 351
         if ($iconv && !$config->get('Test.ForceNoIconv')) {
342 352
             // unaffected by bugs, since UTF-8 support all characters
343 353
             $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
@@ -368,15 +378,21 @@  discard block
 block discarded – undo
368 378
         if ($escape = $config->get('Core.EscapeNonASCIICharacters')) {
369 379
             $str = self::convertToASCIIDumbLossless($str);
370 380
         }
371
-        if ($encoding === 'utf-8') return $str;
381
+        if ($encoding === 'utf-8') {
382
+        	return $str;
383
+        }
372 384
         static $iconv = null;
373
-        if ($iconv === null) $iconv = self::iconvAvailable();
385
+        if ($iconv === null) {
386
+        	$iconv = self::iconvAvailable();
387
+        }
374 388
         if ($iconv && !$config->get('Test.ForceNoIconv')) {
375 389
             // Undo our previous fix in convertToUTF8, otherwise iconv will barf
376 390
             $ascii_fix = self::testEncodingSupportsASCII($encoding);
377 391
             if (!$escape && !empty($ascii_fix)) {
378 392
                 $clear_fix = array();
379
-                foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = '';
393
+                foreach ($ascii_fix as $utf8 => $native) {
394
+                	$clear_fix[$utf8] = '';
395
+                }
380 396
                 $str = strtr($str, $clear_fix);
381 397
             }
382 398
             $str = strtr($str, array_flip($ascii_fix));
@@ -503,7 +519,9 @@  discard block
 block discarded – undo
503 519
         // If ICONV_UNUSABLE, this call is irrelevant
504 520
         static $encodings = array();
505 521
         if (!$bypass) {
506
-            if (isset($encodings[$encoding])) return $encodings[$encoding];
522
+            if (isset($encodings[$encoding])) {
523
+            	return $encodings[$encoding];
524
+            }
507 525
             $lenc = strtolower($encoding);
508 526
             switch ($lenc) {
509 527
                 case 'shift_jis':
@@ -511,10 +529,14 @@  discard block
 block discarded – undo
511 529
                 case 'johab':
512 530
                     return array("\xE2\x82\xA9" => '\\');
513 531
             }
514
-            if (strpos($lenc, 'iso-8859-') === 0) return array();
532
+            if (strpos($lenc, 'iso-8859-') === 0) {
533
+            	return array();
534
+            }
515 535
         }
516 536
         $ret = array();
517
-        if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) return false;
537
+        if (self::unsafeIconv('UTF-8', $encoding, 'a') === false) {
538
+        	return false;
539
+        }
518 540
         for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars
519 541
             $c = chr($i); // UTF-8 char
520 542
             $r = self::unsafeIconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion
Please login to merge, or discard this patch.
Spacing   +21 added lines, -21 removed lines patch added patch discarded remove patch
@@ -132,7 +132,7 @@  discard block
 block discarded – undo
132 132
         $char = '';
133 133
 
134 134
         $len = strlen($str);
135
-        for($i = 0; $i < $len; $i++) {
135
+        for ($i = 0; $i < $len; $i++) {
136 136
             $in = ord($str{$i});
137 137
             $char .= $str[$i]; // append byte to char
138 138
             if (0 == $mState) {
@@ -252,7 +252,7 @@  discard block
 block discarded – undo
252 252
                     $mState = 0;
253 253
                     $mUcs4  = 0;
254 254
                     $mBytes = 1;
255
-                    $char ='';
255
+                    $char = '';
256 256
                 }
257 257
             }
258 258
         }
@@ -286,8 +286,8 @@  discard block
 block discarded – undo
286 286
     // +----------+----------+----------+----------+
287 287
 
288 288
     public static function unichr($code) {
289
-        if($code > 1114111 or $code < 0 or
290
-          ($code >= 55296 and $code <= 57343) ) {
289
+        if ($code > 1114111 or $code < 0 or
290
+          ($code >= 55296 and $code <= 57343)) {
291 291
             // bits are set outside the "valid" range as defined
292 292
             // by UNICODE 4.1.0
293 293
             return '';
@@ -304,19 +304,19 @@  discard block
 block discarded – undo
304 304
                 $y = (($code & 2047) >> 6) | 192;
305 305
             } else {
306 306
                 $y = (($code & 4032) >> 6) | 128;
307
-                if($code < 65536) {
307
+                if ($code < 65536) {
308 308
                     $z = (($code >> 12) & 15) | 224;
309 309
                 } else {
310 310
                     $z = (($code >> 12) & 63) | 128;
311
-                    $w = (($code >> 18) & 7)  | 240;
311
+                    $w = (($code >> 18) & 7) | 240;
312 312
                 }
313 313
             }
314 314
         }
315 315
         // set up the actual character
316 316
         $ret = '';
317
-        if($w) $ret .= chr($w);
318
-        if($z) $ret .= chr($z);
319
-        if($y) $ret .= chr($y);
317
+        if ($w) $ret .= chr($w);
318
+        if ($z) $ret .= chr($z);
319
+        if ($y) $ret .= chr($y);
320 320
         $ret .= chr($x);
321 321
 
322 322
         return $ret;
@@ -343,7 +343,7 @@  discard block
 block discarded – undo
343 343
             $str = self::unsafeIconv($encoding, 'utf-8//IGNORE', $str);
344 344
             if ($str === false) {
345 345
                 // $encoding is not a valid encoding
346
-                trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR);
346
+                trigger_error('Invalid encoding '.$encoding, E_USER_ERROR);
347 347
                 return '';
348 348
             }
349 349
             // If the string is bjorked by Shift_JIS or a similar encoding
@@ -381,7 +381,7 @@  discard block
 block discarded – undo
381 381
             }
382 382
             $str = strtr($str, array_flip($ascii_fix));
383 383
             // Normal stuff
384
-            $str = self::iconv('utf-8', $encoding . '//IGNORE', $str);
384
+            $str = self::iconv('utf-8', $encoding.'//IGNORE', $str);
385 385
             return $str;
386 386
         } elseif ($encoding === 'iso-8859-1') {
387 387
             $str = utf8_decode($str);
@@ -415,22 +415,22 @@  discard block
 block discarded – undo
415 415
         $result = '';
416 416
         $working = 0;
417 417
         $len = strlen($str);
418
-        for( $i = 0; $i < $len; $i++ ) {
419
-            $bytevalue = ord( $str[$i] );
420
-            if( $bytevalue <= 0x7F ) { //0xxx xxxx
421
-                $result .= chr( $bytevalue );
418
+        for ($i = 0; $i < $len; $i++) {
419
+            $bytevalue = ord($str[$i]);
420
+            if ($bytevalue <= 0x7F) { //0xxx xxxx
421
+                $result .= chr($bytevalue);
422 422
                 $bytesleft = 0;
423
-            } elseif( $bytevalue <= 0xBF ) { //10xx xxxx
423
+            } elseif ($bytevalue <= 0xBF) { //10xx xxxx
424 424
                 $working = $working << 6;
425 425
                 $working += ($bytevalue & 0x3F);
426 426
                 $bytesleft--;
427
-                if( $bytesleft <= 0 ) {
428
-                    $result .= "&#" . $working . ";";
427
+                if ($bytesleft <= 0) {
428
+                    $result .= "&#".$working.";";
429 429
                 }
430
-            } elseif( $bytevalue <= 0xDF ) { //110x xxxx
430
+            } elseif ($bytevalue <= 0xDF) { //110x xxxx
431 431
                 $working = $bytevalue & 0x1F;
432 432
                 $bytesleft = 1;
433
-            } elseif( $bytevalue <= 0xEF ) { //1110 xxxx
433
+            } elseif ($bytevalue <= 0xEF) { //1110 xxxx
434 434
                 $working = $bytevalue & 0x0F;
435 435
                 $bytesleft = 2;
436 436
             } else { //1111 0xxx
@@ -470,7 +470,7 @@  discard block
 block discarded – undo
470 470
         static $code = null;
471 471
         if ($code === null) {
472 472
             // better not use iconv, otherwise infinite loop!
473
-            $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1" . str_repeat('a', 9000));
473
+            $r = self::unsafeIconv('utf-8', 'ascii//IGNORE', "\xCE\xB1".str_repeat('a', 9000));
474 474
             if ($r === false) {
475 475
                 $code = self::ICONV_UNUSABLE;
476 476
             } elseif (($c = strlen($r)) < 9000) {
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/ErrorCollector.php 4 patches
Doc Comments   +3 added lines patch added patch discarded remove patch
@@ -25,6 +25,9 @@
 block discarded – undo
25 25
 
26 26
     protected $lines = array();
27 27
 
28
+    /**
29
+     * @param HTMLPurifier_Context $context
30
+     */
28 31
     public function __construct($context) {
29 32
         $this->locale    =& $context->get('Locale');
30 33
         $this->context   = $context;
Please login to merge, or discard this patch.
Indentation   +196 added lines, -196 removed lines patch added patch discarded remove patch
@@ -7,202 +7,202 @@
 block discarded – undo
7 7
 class HTMLPurifier_ErrorCollector
8 8
 {
9 9
 
10
-    /**
11
-     * Identifiers for the returned error array. These are purposely numeric
12
-     * so list() can be used.
13
-     */
14
-    const LINENO   = 0;
15
-    const SEVERITY = 1;
16
-    const MESSAGE  = 2;
17
-    const CHILDREN = 3;
18
-
19
-    protected $errors;
20
-    protected $_current;
21
-    protected $_stacks = array(array());
22
-    protected $locale;
23
-    protected $generator;
24
-    protected $context;
25
-
26
-    protected $lines = array();
27
-
28
-    public function __construct($context) {
29
-        $this->locale    =& $context->get('Locale');
30
-        $this->context   = $context;
31
-        $this->_current  =& $this->_stacks[0];
32
-        $this->errors    =& $this->_stacks[0];
33
-    }
34
-
35
-    /**
36
-     * Sends an error message to the collector for later use
37
-     * @param $severity int Error severity, PHP error style (don't use E_USER_)
38
-     * @param $msg string Error message text
39
-     * @param $subst1 string First substitution for $msg
40
-     * @param $subst2 string ...
41
-     */
42
-    public function send($severity, $msg) {
43
-
44
-        $args = array();
45
-        if (func_num_args() > 2) {
46
-            $args = func_get_args();
47
-            array_shift($args);
48
-            unset($args[0]);
49
-        }
50
-
51
-        $token = $this->context->get('CurrentToken', true);
52
-        $line  = $token ? $token->line : $this->context->get('CurrentLine', true);
53
-        $col   = $token ? $token->col  : $this->context->get('CurrentCol',  true);
54
-        $attr  = $this->context->get('CurrentAttr', true);
55
-
56
-        // perform special substitutions, also add custom parameters
57
-        $subst = array();
58
-        if (!is_null($token)) {
59
-            $args['CurrentToken'] = $token;
60
-        }
61
-        if (!is_null($attr)) {
62
-            $subst['$CurrentAttr.Name'] = $attr;
63
-            if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
64
-        }
65
-
66
-        if (empty($args)) {
67
-            $msg = $this->locale->getMessage($msg);
68
-        } else {
69
-            $msg = $this->locale->formatMessage($msg, $args);
70
-        }
71
-
72
-        if (!empty($subst)) $msg = strtr($msg, $subst);
73
-
74
-        // (numerically indexed)
75
-        $error = array(
76
-            self::LINENO   => $line,
77
-            self::SEVERITY => $severity,
78
-            self::MESSAGE  => $msg,
79
-            self::CHILDREN => array()
80
-        );
81
-        $this->_current[] = $error;
82
-
83
-
84
-        // NEW CODE BELOW ...
85
-
86
-        $struct = null;
87
-        // Top-level errors are either:
88
-        //  TOKEN type, if $value is set appropriately, or
89
-        //  "syntax" type, if $value is null
90
-        $new_struct = new HTMLPurifier_ErrorStruct();
91
-        $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
92
-        if ($token) $new_struct->value = clone $token;
93
-        if (is_int($line) && is_int($col)) {
94
-            if (isset($this->lines[$line][$col])) {
95
-                $struct = $this->lines[$line][$col];
96
-            } else {
97
-                $struct = $this->lines[$line][$col] = $new_struct;
98
-            }
99
-            // These ksorts may present a performance problem
100
-            ksort($this->lines[$line], SORT_NUMERIC);
101
-        } else {
102
-            if (isset($this->lines[-1])) {
103
-                $struct = $this->lines[-1];
104
-            } else {
105
-                $struct = $this->lines[-1] = $new_struct;
106
-            }
107
-        }
108
-        ksort($this->lines, SORT_NUMERIC);
109
-
110
-        // Now, check if we need to operate on a lower structure
111
-        if (!empty($attr)) {
112
-            $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
113
-            if (!$struct->value) {
114
-                $struct->value = array($attr, 'PUT VALUE HERE');
115
-            }
116
-        }
117
-        if (!empty($cssprop)) {
118
-            $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
119
-            if (!$struct->value) {
120
-                // if we tokenize CSS this might be a little more difficult to do
121
-                $struct->value = array($cssprop, 'PUT VALUE HERE');
122
-            }
123
-        }
124
-
125
-        // Ok, structs are all setup, now time to register the error
126
-        $struct->addError($severity, $msg);
127
-    }
128
-
129
-    /**
130
-     * Retrieves raw error data for custom formatter to use
131
-     * @param List of arrays in format of array(line of error,
132
-     *        error severity, error message,
133
-     *        recursive sub-errors array)
134
-     */
135
-    public function getRaw() {
136
-        return $this->errors;
137
-    }
138
-
139
-    /**
140
-     * Default HTML formatting implementation for error messages
141
-     * @param $config Configuration array, vital for HTML output nature
142
-     * @param $errors Errors array to display; used for recursion.
143
-     */
144
-    public function getHTMLFormatted($config, $errors = null) {
145
-        $ret = array();
146
-
147
-        $this->generator = new HTMLPurifier_Generator($config, $this->context);
148
-        if ($errors === null) $errors = $this->errors;
149
-
150
-        // 'At line' message needs to be removed
151
-
152
-        // generation code for new structure goes here. It needs to be recursive.
153
-        foreach ($this->lines as $line => $col_array) {
154
-            if ($line == -1) continue;
155
-            foreach ($col_array as $col => $struct) {
156
-                $this->_renderStruct($ret, $struct, $line, $col);
157
-            }
158
-        }
159
-        if (isset($this->lines[-1])) {
160
-            $this->_renderStruct($ret, $this->lines[-1]);
161
-        }
162
-
163
-        if (empty($errors)) {
164
-            return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
165
-        } else {
166
-            return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
167
-        }
168
-
169
-    }
170
-
171
-    private function _renderStruct(&$ret, $struct, $line = null, $col = null) {
172
-        $stack = array($struct);
173
-        $context_stack = array(array());
174
-        while ($current = array_pop($stack)) {
175
-            $context = array_pop($context_stack);
176
-            foreach ($current->errors as $error) {
177
-                list($severity, $msg) = $error;
178
-                $string = '';
179
-                $string .= '<div>';
180
-                // W3C uses an icon to indicate the severity of the error.
181
-                $error = $this->locale->getErrorName($severity);
182
-                $string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
183
-                if (!is_null($line) && !is_null($col)) {
184
-                    $string .= "<em class=\"location\">Line $line, Column $col: </em> ";
185
-                } else {
186
-                    $string .= '<em class="location">End of Document: </em> ';
187
-                }
188
-                $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
189
-                $string .= '</div>';
190
-                // Here, have a marker for the character on the column appropriate.
191
-                // Be sure to clip extremely long lines.
192
-                //$string .= '<pre>';
193
-                //$string .= '';
194
-                //$string .= '</pre>';
195
-                $ret[] = $string;
196
-            }
197
-            foreach ($current->children as $type => $array) {
198
-                $context[] = $current;
199
-                $stack = array_merge($stack, array_reverse($array, true));
200
-                for ($i = count($array); $i > 0; $i--) {
201
-                    $context_stack[] = $context;
202
-                }
203
-            }
204
-        }
205
-    }
10
+	/**
11
+	 * Identifiers for the returned error array. These are purposely numeric
12
+	 * so list() can be used.
13
+	 */
14
+	const LINENO   = 0;
15
+	const SEVERITY = 1;
16
+	const MESSAGE  = 2;
17
+	const CHILDREN = 3;
18
+
19
+	protected $errors;
20
+	protected $_current;
21
+	protected $_stacks = array(array());
22
+	protected $locale;
23
+	protected $generator;
24
+	protected $context;
25
+
26
+	protected $lines = array();
27
+
28
+	public function __construct($context) {
29
+		$this->locale    =& $context->get('Locale');
30
+		$this->context   = $context;
31
+		$this->_current  =& $this->_stacks[0];
32
+		$this->errors    =& $this->_stacks[0];
33
+	}
34
+
35
+	/**
36
+	 * Sends an error message to the collector for later use
37
+	 * @param $severity int Error severity, PHP error style (don't use E_USER_)
38
+	 * @param $msg string Error message text
39
+	 * @param $subst1 string First substitution for $msg
40
+	 * @param $subst2 string ...
41
+	 */
42
+	public function send($severity, $msg) {
43
+
44
+		$args = array();
45
+		if (func_num_args() > 2) {
46
+			$args = func_get_args();
47
+			array_shift($args);
48
+			unset($args[0]);
49
+		}
50
+
51
+		$token = $this->context->get('CurrentToken', true);
52
+		$line  = $token ? $token->line : $this->context->get('CurrentLine', true);
53
+		$col   = $token ? $token->col  : $this->context->get('CurrentCol',  true);
54
+		$attr  = $this->context->get('CurrentAttr', true);
55
+
56
+		// perform special substitutions, also add custom parameters
57
+		$subst = array();
58
+		if (!is_null($token)) {
59
+			$args['CurrentToken'] = $token;
60
+		}
61
+		if (!is_null($attr)) {
62
+			$subst['$CurrentAttr.Name'] = $attr;
63
+			if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
64
+		}
65
+
66
+		if (empty($args)) {
67
+			$msg = $this->locale->getMessage($msg);
68
+		} else {
69
+			$msg = $this->locale->formatMessage($msg, $args);
70
+		}
71
+
72
+		if (!empty($subst)) $msg = strtr($msg, $subst);
73
+
74
+		// (numerically indexed)
75
+		$error = array(
76
+			self::LINENO   => $line,
77
+			self::SEVERITY => $severity,
78
+			self::MESSAGE  => $msg,
79
+			self::CHILDREN => array()
80
+		);
81
+		$this->_current[] = $error;
82
+
83
+
84
+		// NEW CODE BELOW ...
85
+
86
+		$struct = null;
87
+		// Top-level errors are either:
88
+		//  TOKEN type, if $value is set appropriately, or
89
+		//  "syntax" type, if $value is null
90
+		$new_struct = new HTMLPurifier_ErrorStruct();
91
+		$new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
92
+		if ($token) $new_struct->value = clone $token;
93
+		if (is_int($line) && is_int($col)) {
94
+			if (isset($this->lines[$line][$col])) {
95
+				$struct = $this->lines[$line][$col];
96
+			} else {
97
+				$struct = $this->lines[$line][$col] = $new_struct;
98
+			}
99
+			// These ksorts may present a performance problem
100
+			ksort($this->lines[$line], SORT_NUMERIC);
101
+		} else {
102
+			if (isset($this->lines[-1])) {
103
+				$struct = $this->lines[-1];
104
+			} else {
105
+				$struct = $this->lines[-1] = $new_struct;
106
+			}
107
+		}
108
+		ksort($this->lines, SORT_NUMERIC);
109
+
110
+		// Now, check if we need to operate on a lower structure
111
+		if (!empty($attr)) {
112
+			$struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr);
113
+			if (!$struct->value) {
114
+				$struct->value = array($attr, 'PUT VALUE HERE');
115
+			}
116
+		}
117
+		if (!empty($cssprop)) {
118
+			$struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop);
119
+			if (!$struct->value) {
120
+				// if we tokenize CSS this might be a little more difficult to do
121
+				$struct->value = array($cssprop, 'PUT VALUE HERE');
122
+			}
123
+		}
124
+
125
+		// Ok, structs are all setup, now time to register the error
126
+		$struct->addError($severity, $msg);
127
+	}
128
+
129
+	/**
130
+	 * Retrieves raw error data for custom formatter to use
131
+	 * @param List of arrays in format of array(line of error,
132
+	 *        error severity, error message,
133
+	 *        recursive sub-errors array)
134
+	 */
135
+	public function getRaw() {
136
+		return $this->errors;
137
+	}
138
+
139
+	/**
140
+	 * Default HTML formatting implementation for error messages
141
+	 * @param $config Configuration array, vital for HTML output nature
142
+	 * @param $errors Errors array to display; used for recursion.
143
+	 */
144
+	public function getHTMLFormatted($config, $errors = null) {
145
+		$ret = array();
146
+
147
+		$this->generator = new HTMLPurifier_Generator($config, $this->context);
148
+		if ($errors === null) $errors = $this->errors;
149
+
150
+		// 'At line' message needs to be removed
151
+
152
+		// generation code for new structure goes here. It needs to be recursive.
153
+		foreach ($this->lines as $line => $col_array) {
154
+			if ($line == -1) continue;
155
+			foreach ($col_array as $col => $struct) {
156
+				$this->_renderStruct($ret, $struct, $line, $col);
157
+			}
158
+		}
159
+		if (isset($this->lines[-1])) {
160
+			$this->_renderStruct($ret, $this->lines[-1]);
161
+		}
162
+
163
+		if (empty($errors)) {
164
+			return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
165
+		} else {
166
+			return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
167
+		}
168
+
169
+	}
170
+
171
+	private function _renderStruct(&$ret, $struct, $line = null, $col = null) {
172
+		$stack = array($struct);
173
+		$context_stack = array(array());
174
+		while ($current = array_pop($stack)) {
175
+			$context = array_pop($context_stack);
176
+			foreach ($current->errors as $error) {
177
+				list($severity, $msg) = $error;
178
+				$string = '';
179
+				$string .= '<div>';
180
+				// W3C uses an icon to indicate the severity of the error.
181
+				$error = $this->locale->getErrorName($severity);
182
+				$string .= "<span class=\"error e$severity\"><strong>$error</strong></span> ";
183
+				if (!is_null($line) && !is_null($col)) {
184
+					$string .= "<em class=\"location\">Line $line, Column $col: </em> ";
185
+				} else {
186
+					$string .= '<em class="location">End of Document: </em> ';
187
+				}
188
+				$string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
189
+				$string .= '</div>';
190
+				// Here, have a marker for the character on the column appropriate.
191
+				// Be sure to clip extremely long lines.
192
+				//$string .= '<pre>';
193
+				//$string .= '';
194
+				//$string .= '</pre>';
195
+				$ret[] = $string;
196
+			}
197
+			foreach ($current->children as $type => $array) {
198
+				$context[] = $current;
199
+				$stack = array_merge($stack, array_reverse($array, true));
200
+				for ($i = count($array); $i > 0; $i--) {
201
+					$context_stack[] = $context;
202
+				}
203
+			}
204
+		}
205
+	}
206 206
 
207 207
 }
208 208
 
Please login to merge, or discard this patch.
Braces   +15 added lines, -5 removed lines patch added patch discarded remove patch
@@ -60,7 +60,9 @@  discard block
 block discarded – undo
60 60
         }
61 61
         if (!is_null($attr)) {
62 62
             $subst['$CurrentAttr.Name'] = $attr;
63
-            if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr];
63
+            if (isset($token->attr[$attr])) {
64
+            	$subst['$CurrentAttr.Value'] = $token->attr[$attr];
65
+            }
64 66
         }
65 67
 
66 68
         if (empty($args)) {
@@ -69,7 +71,9 @@  discard block
 block discarded – undo
69 71
             $msg = $this->locale->formatMessage($msg, $args);
70 72
         }
71 73
 
72
-        if (!empty($subst)) $msg = strtr($msg, $subst);
74
+        if (!empty($subst)) {
75
+        	$msg = strtr($msg, $subst);
76
+        }
73 77
 
74 78
         // (numerically indexed)
75 79
         $error = array(
@@ -89,7 +93,9 @@  discard block
 block discarded – undo
89 93
         //  "syntax" type, if $value is null
90 94
         $new_struct = new HTMLPurifier_ErrorStruct();
91 95
         $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN;
92
-        if ($token) $new_struct->value = clone $token;
96
+        if ($token) {
97
+        	$new_struct->value = clone $token;
98
+        }
93 99
         if (is_int($line) && is_int($col)) {
94 100
             if (isset($this->lines[$line][$col])) {
95 101
                 $struct = $this->lines[$line][$col];
@@ -145,13 +151,17 @@  discard block
 block discarded – undo
145 151
         $ret = array();
146 152
 
147 153
         $this->generator = new HTMLPurifier_Generator($config, $this->context);
148
-        if ($errors === null) $errors = $this->errors;
154
+        if ($errors === null) {
155
+        	$errors = $this->errors;
156
+        }
149 157
 
150 158
         // 'At line' message needs to be removed
151 159
 
152 160
         // generation code for new structure goes here. It needs to be recursive.
153 161
         foreach ($this->lines as $line => $col_array) {
154
-            if ($line == -1) continue;
162
+            if ($line == -1) {
163
+            	continue;
164
+            }
155 165
             foreach ($col_array as $col => $struct) {
156 166
                 $this->_renderStruct($ret, $struct, $line, $col);
157 167
             }
Please login to merge, or discard this patch.
Spacing   +7 added lines, -7 removed lines patch added patch discarded remove patch
@@ -26,10 +26,10 @@  discard block
 block discarded – undo
26 26
     protected $lines = array();
27 27
 
28 28
     public function __construct($context) {
29
-        $this->locale    =& $context->get('Locale');
29
+        $this->locale    = & $context->get('Locale');
30 30
         $this->context   = $context;
31
-        $this->_current  =& $this->_stacks[0];
32
-        $this->errors    =& $this->_stacks[0];
31
+        $this->_current  = & $this->_stacks[0];
32
+        $this->errors    = & $this->_stacks[0];
33 33
     }
34 34
 
35 35
     /**
@@ -50,7 +50,7 @@  discard block
 block discarded – undo
50 50
 
51 51
         $token = $this->context->get('CurrentToken', true);
52 52
         $line  = $token ? $token->line : $this->context->get('CurrentLine', true);
53
-        $col   = $token ? $token->col  : $this->context->get('CurrentCol',  true);
53
+        $col   = $token ? $token->col : $this->context->get('CurrentCol', true);
54 54
         $attr  = $this->context->get('CurrentAttr', true);
55 55
 
56 56
         // perform special substitutions, also add custom parameters
@@ -161,9 +161,9 @@  discard block
 block discarded – undo
161 161
         }
162 162
 
163 163
         if (empty($errors)) {
164
-            return '<p>' . $this->locale->getMessage('ErrorCollector: No errors') . '</p>';
164
+            return '<p>'.$this->locale->getMessage('ErrorCollector: No errors').'</p>';
165 165
         } else {
166
-            return '<ul><li>' . implode('</li><li>', $ret) . '</li></ul>';
166
+            return '<ul><li>'.implode('</li><li>', $ret).'</li></ul>';
167 167
         }
168 168
 
169 169
     }
@@ -185,7 +185,7 @@  discard block
 block discarded – undo
185 185
                 } else {
186 186
                     $string .= '<em class="location">End of Document: </em> ';
187 187
                 }
188
-                $string .= '<strong class="description">' . $this->generator->escape($msg) . '</strong> ';
188
+                $string .= '<strong class="description">'.$this->generator->escape($msg).'</strong> ';
189 189
                 $string .= '</div>';
190 190
                 // Here, have a marker for the character on the column appropriate.
191 191
                 // Be sure to clip extremely long lines.
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/Generator.php 4 patches
Doc Comments   +4 added lines, -4 removed lines patch added patch discarded remove patch
@@ -70,7 +70,7 @@  discard block
 block discarded – undo
70 70
      * Generates HTML from an array of tokens.
71 71
      * @param $tokens Array of HTMLPurifier_Token
72 72
      * @param $config HTMLPurifier_Config object
73
-     * @return Generated HTML
73
+     * @return string HTML
74 74
      */
75 75
     public function generateFromTokens($tokens) {
76 76
         if (!$tokens) return '';
@@ -115,7 +115,7 @@  discard block
 block discarded – undo
115 115
     /**
116 116
      * Generates HTML from a single token.
117 117
      * @param $token HTMLPurifier_Token object.
118
-     * @return Generated HTML
118
+     * @return string HTML
119 119
      */
120 120
     public function generateFromToken($token) {
121 121
         if (!$token instanceof HTMLPurifier_Token) {
@@ -181,7 +181,7 @@  discard block
 block discarded – undo
181 181
      * @param $assoc_array_of_attributes Attribute array
182 182
      * @param $element Name of element attributes are for, used to check
183 183
      *        attribute minimization.
184
-     * @return Generate HTML fragment for insertion.
184
+     * @return string HTML fragment for insertion.
185 185
      */
186 186
     public function generateAttributes($assoc_array_of_attributes, $element = false) {
187 187
         $html = '';
@@ -238,7 +238,7 @@  discard block
 block discarded – undo
238 238
      *       for properly generating HTML here w/o using tokens, it stays
239 239
      *       public.
240 240
      * @param $string String data to escape for HTML.
241
-     * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
241
+     * @param integer $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
242 242
      *               permissible for non-attribute output.
243 243
      * @return String escaped data.
244 244
      */
Please login to merge, or discard this patch.
Indentation   +238 added lines, -238 removed lines patch added patch discarded remove patch
@@ -10,244 +10,244 @@
 block discarded – undo
10 10
 class HTMLPurifier_Generator
11 11
 {
12 12
 
13
-    /**
14
-     * Whether or not generator should produce XML output
15
-     */
16
-    private $_xhtml = true;
17
-
18
-    /**
19
-     * :HACK: Whether or not generator should comment the insides of <script> tags
20
-     */
21
-    private $_scriptFix = false;
22
-
23
-    /**
24
-     * Cache of HTMLDefinition during HTML output to determine whether or
25
-     * not attributes should be minimized.
26
-     */
27
-    private $_def;
28
-
29
-    /**
30
-     * Cache of %Output.SortAttr
31
-     */
32
-    private $_sortAttr;
33
-
34
-    /**
35
-     * Cache of %Output.FlashCompat
36
-     */
37
-    private $_flashCompat;
38
-
39
-    /**
40
-     * Cache of %Output.FixInnerHTML
41
-     */
42
-    private $_innerHTMLFix;
43
-
44
-    /**
45
-     * Stack for keeping track of object information when outputting IE
46
-     * compatibility code.
47
-     */
48
-    private $_flashStack = array();
49
-
50
-    /**
51
-     * Configuration for the generator
52
-     */
53
-    protected $config;
54
-
55
-    /**
56
-     * @param $config Instance of HTMLPurifier_Config
57
-     * @param $context Instance of HTMLPurifier_Context
58
-     */
59
-    public function __construct($config, $context) {
60
-        $this->config = $config;
61
-        $this->_scriptFix = $config->get('Output.CommentScriptContents');
62
-        $this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
63
-        $this->_sortAttr = $config->get('Output.SortAttr');
64
-        $this->_flashCompat = $config->get('Output.FlashCompat');
65
-        $this->_def = $config->getHTMLDefinition();
66
-        $this->_xhtml = $this->_def->doctype->xml;
67
-    }
68
-
69
-    /**
70
-     * Generates HTML from an array of tokens.
71
-     * @param $tokens Array of HTMLPurifier_Token
72
-     * @param $config HTMLPurifier_Config object
73
-     * @return Generated HTML
74
-     */
75
-    public function generateFromTokens($tokens) {
76
-        if (!$tokens) return '';
77
-
78
-        // Basic algorithm
79
-        $html = '';
80
-        for ($i = 0, $size = count($tokens); $i < $size; $i++) {
81
-            if ($this->_scriptFix && $tokens[$i]->name === 'script'
82
-                && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
83
-                // script special case
84
-                // the contents of the script block must be ONE token
85
-                // for this to work.
86
-                $html .= $this->generateFromToken($tokens[$i++]);
87
-                $html .= $this->generateScriptFromToken($tokens[$i++]);
88
-            }
89
-            $html .= $this->generateFromToken($tokens[$i]);
90
-        }
91
-
92
-        // Tidy cleanup
93
-        if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
94
-            $tidy = new Tidy;
95
-            $tidy->parseString($html, array(
96
-               'indent'=> true,
97
-               'output-xhtml' => $this->_xhtml,
98
-               'show-body-only' => true,
99
-               'indent-spaces' => 2,
100
-               'wrap' => 68,
101
-            ), 'utf8');
102
-            $tidy->cleanRepair();
103
-            $html = (string) $tidy; // explicit cast necessary
104
-        }
105
-
106
-        // Normalize newlines to system defined value
107
-        if ($this->config->get('Core.NormalizeNewlines')) {
108
-            $nl = $this->config->get('Output.Newline');
109
-            if ($nl === null) $nl = PHP_EOL;
110
-            if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
111
-        }
112
-        return $html;
113
-    }
114
-
115
-    /**
116
-     * Generates HTML from a single token.
117
-     * @param $token HTMLPurifier_Token object.
118
-     * @return Generated HTML
119
-     */
120
-    public function generateFromToken($token) {
121
-        if (!$token instanceof HTMLPurifier_Token) {
122
-            trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
123
-            return '';
124
-
125
-        } elseif ($token instanceof HTMLPurifier_Token_Start) {
126
-            $attr = $this->generateAttributes($token->attr, $token->name);
127
-            if ($this->_flashCompat) {
128
-                if ($token->name == "object") {
129
-                    $flash = new stdclass();
130
-                    $flash->attr = $token->attr;
131
-                    $flash->param = array();
132
-                    $this->_flashStack[] = $flash;
133
-                }
134
-            }
135
-            return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
136
-
137
-        } elseif ($token instanceof HTMLPurifier_Token_End) {
138
-            $_extra = '';
139
-            if ($this->_flashCompat) {
140
-                if ($token->name == "object" && !empty($this->_flashStack)) {
141
-                    // doesn't do anything for now
142
-                }
143
-            }
144
-            return $_extra . '</' . $token->name . '>';
145
-
146
-        } elseif ($token instanceof HTMLPurifier_Token_Empty) {
147
-            if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
148
-                $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
149
-            }
150
-            $attr = $this->generateAttributes($token->attr, $token->name);
151
-             return '<' . $token->name . ($attr ? ' ' : '') . $attr .
152
-                ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
153
-                . '>';
154
-
155
-        } elseif ($token instanceof HTMLPurifier_Token_Text) {
156
-            return $this->escape($token->data, ENT_NOQUOTES);
157
-
158
-        } elseif ($token instanceof HTMLPurifier_Token_Comment) {
159
-            return '<!--' . $token->data . '-->';
160
-        } else {
161
-            return '';
162
-
163
-        }
164
-    }
165
-
166
-    /**
167
-     * Special case processor for the contents of script tags
168
-     * @warning This runs into problems if there's already a literal
169
-     *          --> somewhere inside the script contents.
170
-     */
171
-    public function generateScriptFromToken($token) {
172
-        if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
173
-        // Thanks <http://lachy.id.au/log/2005/05/script-comments>
174
-        $data = preg_replace('#//\s*$#', '', $token->data);
175
-        return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
176
-    }
177
-
178
-    /**
179
-     * Generates attribute declarations from attribute array.
180
-     * @note This does not include the leading or trailing space.
181
-     * @param $assoc_array_of_attributes Attribute array
182
-     * @param $element Name of element attributes are for, used to check
183
-     *        attribute minimization.
184
-     * @return Generate HTML fragment for insertion.
185
-     */
186
-    public function generateAttributes($assoc_array_of_attributes, $element = false) {
187
-        $html = '';
188
-        if ($this->_sortAttr) ksort($assoc_array_of_attributes);
189
-        foreach ($assoc_array_of_attributes as $key => $value) {
190
-            if (!$this->_xhtml) {
191
-                // Remove namespaced attributes
192
-                if (strpos($key, ':') !== false) continue;
193
-                // Check if we should minimize the attribute: val="val" -> val
194
-                if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
195
-                    $html .= $key . ' ';
196
-                    continue;
197
-                }
198
-            }
199
-            // Workaround for Internet Explorer innerHTML bug.
200
-            // Essentially, Internet Explorer, when calculating
201
-            // innerHTML, omits quotes if there are no instances of
202
-            // angled brackets, quotes or spaces.  However, when parsing
203
-            // HTML (for example, when you assign to innerHTML), it
204
-            // treats backticks as quotes.  Thus,
205
-            //      <img alt="``" />
206
-            // becomes
207
-            //      <img alt=`` />
208
-            // becomes
209
-            //      <img alt='' />
210
-            // Fortunately, all we need to do is trigger an appropriate
211
-            // quoting style, which we do by adding an extra space.
212
-            // This also is consistent with the W3C spec, which states
213
-            // that user agents may ignore leading or trailing
214
-            // whitespace (in fact, most don't, at least for attributes
215
-            // like alt, but an extra space at the end is barely
216
-            // noticeable).  Still, we have a configuration knob for
217
-            // this, since this transformation is not necesary if you
218
-            // don't process user input with innerHTML or you don't plan
219
-            // on supporting Internet Explorer.
220
-            if ($this->_innerHTMLFix) {
221
-                if (strpos($value, '`') !== false) {
222
-                    // check if correct quoting style would not already be
223
-                    // triggered
224
-                    if (strcspn($value, '"\' <>') === strlen($value)) {
225
-                        // protect!
226
-                        $value .= ' ';
227
-                    }
228
-                }
229
-            }
230
-            $html .= $key.'="'.$this->escape($value).'" ';
231
-        }
232
-        return rtrim($html);
233
-    }
234
-
235
-    /**
236
-     * Escapes raw text data.
237
-     * @todo This really ought to be protected, but until we have a facility
238
-     *       for properly generating HTML here w/o using tokens, it stays
239
-     *       public.
240
-     * @param $string String data to escape for HTML.
241
-     * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
242
-     *               permissible for non-attribute output.
243
-     * @return String escaped data.
244
-     */
245
-    public function escape($string, $quote = null) {
246
-        // Workaround for APC bug on Mac Leopard reported by sidepodcast
247
-        // http://htmlpurifier.org/phorum/read.php?3,4823,4846
248
-        if ($quote === null) $quote = ENT_COMPAT;
249
-        return htmlspecialchars($string, $quote, 'UTF-8', false);
250
-    }
13
+	/**
14
+	 * Whether or not generator should produce XML output
15
+	 */
16
+	private $_xhtml = true;
17
+
18
+	/**
19
+	 * :HACK: Whether or not generator should comment the insides of <script> tags
20
+	 */
21
+	private $_scriptFix = false;
22
+
23
+	/**
24
+	 * Cache of HTMLDefinition during HTML output to determine whether or
25
+	 * not attributes should be minimized.
26
+	 */
27
+	private $_def;
28
+
29
+	/**
30
+	 * Cache of %Output.SortAttr
31
+	 */
32
+	private $_sortAttr;
33
+
34
+	/**
35
+	 * Cache of %Output.FlashCompat
36
+	 */
37
+	private $_flashCompat;
38
+
39
+	/**
40
+	 * Cache of %Output.FixInnerHTML
41
+	 */
42
+	private $_innerHTMLFix;
43
+
44
+	/**
45
+	 * Stack for keeping track of object information when outputting IE
46
+	 * compatibility code.
47
+	 */
48
+	private $_flashStack = array();
49
+
50
+	/**
51
+	 * Configuration for the generator
52
+	 */
53
+	protected $config;
54
+
55
+	/**
56
+	 * @param $config Instance of HTMLPurifier_Config
57
+	 * @param $context Instance of HTMLPurifier_Context
58
+	 */
59
+	public function __construct($config, $context) {
60
+		$this->config = $config;
61
+		$this->_scriptFix = $config->get('Output.CommentScriptContents');
62
+		$this->_innerHTMLFix = $config->get('Output.FixInnerHTML');
63
+		$this->_sortAttr = $config->get('Output.SortAttr');
64
+		$this->_flashCompat = $config->get('Output.FlashCompat');
65
+		$this->_def = $config->getHTMLDefinition();
66
+		$this->_xhtml = $this->_def->doctype->xml;
67
+	}
68
+
69
+	/**
70
+	 * Generates HTML from an array of tokens.
71
+	 * @param $tokens Array of HTMLPurifier_Token
72
+	 * @param $config HTMLPurifier_Config object
73
+	 * @return Generated HTML
74
+	 */
75
+	public function generateFromTokens($tokens) {
76
+		if (!$tokens) return '';
77
+
78
+		// Basic algorithm
79
+		$html = '';
80
+		for ($i = 0, $size = count($tokens); $i < $size; $i++) {
81
+			if ($this->_scriptFix && $tokens[$i]->name === 'script'
82
+				&& $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
83
+				// script special case
84
+				// the contents of the script block must be ONE token
85
+				// for this to work.
86
+				$html .= $this->generateFromToken($tokens[$i++]);
87
+				$html .= $this->generateScriptFromToken($tokens[$i++]);
88
+			}
89
+			$html .= $this->generateFromToken($tokens[$i]);
90
+		}
91
+
92
+		// Tidy cleanup
93
+		if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) {
94
+			$tidy = new Tidy;
95
+			$tidy->parseString($html, array(
96
+			   'indent'=> true,
97
+			   'output-xhtml' => $this->_xhtml,
98
+			   'show-body-only' => true,
99
+			   'indent-spaces' => 2,
100
+			   'wrap' => 68,
101
+			), 'utf8');
102
+			$tidy->cleanRepair();
103
+			$html = (string) $tidy; // explicit cast necessary
104
+		}
105
+
106
+		// Normalize newlines to system defined value
107
+		if ($this->config->get('Core.NormalizeNewlines')) {
108
+			$nl = $this->config->get('Output.Newline');
109
+			if ($nl === null) $nl = PHP_EOL;
110
+			if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
111
+		}
112
+		return $html;
113
+	}
114
+
115
+	/**
116
+	 * Generates HTML from a single token.
117
+	 * @param $token HTMLPurifier_Token object.
118
+	 * @return Generated HTML
119
+	 */
120
+	public function generateFromToken($token) {
121
+		if (!$token instanceof HTMLPurifier_Token) {
122
+			trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING);
123
+			return '';
124
+
125
+		} elseif ($token instanceof HTMLPurifier_Token_Start) {
126
+			$attr = $this->generateAttributes($token->attr, $token->name);
127
+			if ($this->_flashCompat) {
128
+				if ($token->name == "object") {
129
+					$flash = new stdclass();
130
+					$flash->attr = $token->attr;
131
+					$flash->param = array();
132
+					$this->_flashStack[] = $flash;
133
+				}
134
+			}
135
+			return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
136
+
137
+		} elseif ($token instanceof HTMLPurifier_Token_End) {
138
+			$_extra = '';
139
+			if ($this->_flashCompat) {
140
+				if ($token->name == "object" && !empty($this->_flashStack)) {
141
+					// doesn't do anything for now
142
+				}
143
+			}
144
+			return $_extra . '</' . $token->name . '>';
145
+
146
+		} elseif ($token instanceof HTMLPurifier_Token_Empty) {
147
+			if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
148
+				$this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
149
+			}
150
+			$attr = $this->generateAttributes($token->attr, $token->name);
151
+			 return '<' . $token->name . ($attr ? ' ' : '') . $attr .
152
+				( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
153
+				. '>';
154
+
155
+		} elseif ($token instanceof HTMLPurifier_Token_Text) {
156
+			return $this->escape($token->data, ENT_NOQUOTES);
157
+
158
+		} elseif ($token instanceof HTMLPurifier_Token_Comment) {
159
+			return '<!--' . $token->data . '-->';
160
+		} else {
161
+			return '';
162
+
163
+		}
164
+	}
165
+
166
+	/**
167
+	 * Special case processor for the contents of script tags
168
+	 * @warning This runs into problems if there's already a literal
169
+	 *          --> somewhere inside the script contents.
170
+	 */
171
+	public function generateScriptFromToken($token) {
172
+		if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
173
+		// Thanks <http://lachy.id.au/log/2005/05/script-comments>
174
+		$data = preg_replace('#//\s*$#', '', $token->data);
175
+		return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
176
+	}
177
+
178
+	/**
179
+	 * Generates attribute declarations from attribute array.
180
+	 * @note This does not include the leading or trailing space.
181
+	 * @param $assoc_array_of_attributes Attribute array
182
+	 * @param $element Name of element attributes are for, used to check
183
+	 *        attribute minimization.
184
+	 * @return Generate HTML fragment for insertion.
185
+	 */
186
+	public function generateAttributes($assoc_array_of_attributes, $element = false) {
187
+		$html = '';
188
+		if ($this->_sortAttr) ksort($assoc_array_of_attributes);
189
+		foreach ($assoc_array_of_attributes as $key => $value) {
190
+			if (!$this->_xhtml) {
191
+				// Remove namespaced attributes
192
+				if (strpos($key, ':') !== false) continue;
193
+				// Check if we should minimize the attribute: val="val" -> val
194
+				if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
195
+					$html .= $key . ' ';
196
+					continue;
197
+				}
198
+			}
199
+			// Workaround for Internet Explorer innerHTML bug.
200
+			// Essentially, Internet Explorer, when calculating
201
+			// innerHTML, omits quotes if there are no instances of
202
+			// angled brackets, quotes or spaces.  However, when parsing
203
+			// HTML (for example, when you assign to innerHTML), it
204
+			// treats backticks as quotes.  Thus,
205
+			//      <img alt="``" />
206
+			// becomes
207
+			//      <img alt=`` />
208
+			// becomes
209
+			//      <img alt='' />
210
+			// Fortunately, all we need to do is trigger an appropriate
211
+			// quoting style, which we do by adding an extra space.
212
+			// This also is consistent with the W3C spec, which states
213
+			// that user agents may ignore leading or trailing
214
+			// whitespace (in fact, most don't, at least for attributes
215
+			// like alt, but an extra space at the end is barely
216
+			// noticeable).  Still, we have a configuration knob for
217
+			// this, since this transformation is not necesary if you
218
+			// don't process user input with innerHTML or you don't plan
219
+			// on supporting Internet Explorer.
220
+			if ($this->_innerHTMLFix) {
221
+				if (strpos($value, '`') !== false) {
222
+					// check if correct quoting style would not already be
223
+					// triggered
224
+					if (strcspn($value, '"\' <>') === strlen($value)) {
225
+						// protect!
226
+						$value .= ' ';
227
+					}
228
+				}
229
+			}
230
+			$html .= $key.'="'.$this->escape($value).'" ';
231
+		}
232
+		return rtrim($html);
233
+	}
234
+
235
+	/**
236
+	 * Escapes raw text data.
237
+	 * @todo This really ought to be protected, but until we have a facility
238
+	 *       for properly generating HTML here w/o using tokens, it stays
239
+	 *       public.
240
+	 * @param $string String data to escape for HTML.
241
+	 * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is
242
+	 *               permissible for non-attribute output.
243
+	 * @return String escaped data.
244
+	 */
245
+	public function escape($string, $quote = null) {
246
+		// Workaround for APC bug on Mac Leopard reported by sidepodcast
247
+		// http://htmlpurifier.org/phorum/read.php?3,4823,4846
248
+		if ($quote === null) $quote = ENT_COMPAT;
249
+		return htmlspecialchars($string, $quote, 'UTF-8', false);
250
+	}
251 251
 
252 252
 }
253 253
 
Please login to merge, or discard this patch.
Braces   +21 added lines, -7 removed lines patch added patch discarded remove patch
@@ -73,7 +73,9 @@  discard block
 block discarded – undo
73 73
      * @return Generated HTML
74 74
      */
75 75
     public function generateFromTokens($tokens) {
76
-        if (!$tokens) return '';
76
+        if (!$tokens) {
77
+        	return '';
78
+        }
77 79
 
78 80
         // Basic algorithm
79 81
         $html = '';
@@ -106,8 +108,12 @@  discard block
 block discarded – undo
106 108
         // Normalize newlines to system defined value
107 109
         if ($this->config->get('Core.NormalizeNewlines')) {
108 110
             $nl = $this->config->get('Output.Newline');
109
-            if ($nl === null) $nl = PHP_EOL;
110
-            if ($nl !== "\n") $html = str_replace("\n", $nl, $html);
111
+            if ($nl === null) {
112
+            	$nl = PHP_EOL;
113
+            }
114
+            if ($nl !== "\n") {
115
+            	$html = str_replace("\n", $nl, $html);
116
+            }
111 117
         }
112 118
         return $html;
113 119
     }
@@ -169,7 +175,9 @@  discard block
 block discarded – undo
169 175
      *          --> somewhere inside the script contents.
170 176
      */
171 177
     public function generateScriptFromToken($token) {
172
-        if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
178
+        if (!$token instanceof HTMLPurifier_Token_Text) {
179
+        	return $this->generateFromToken($token);
180
+        }
173 181
         // Thanks <http://lachy.id.au/log/2005/05/script-comments>
174 182
         $data = preg_replace('#//\s*$#', '', $token->data);
175 183
         return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
@@ -185,11 +193,15 @@  discard block
 block discarded – undo
185 193
      */
186 194
     public function generateAttributes($assoc_array_of_attributes, $element = false) {
187 195
         $html = '';
188
-        if ($this->_sortAttr) ksort($assoc_array_of_attributes);
196
+        if ($this->_sortAttr) {
197
+        	ksort($assoc_array_of_attributes);
198
+        }
189 199
         foreach ($assoc_array_of_attributes as $key => $value) {
190 200
             if (!$this->_xhtml) {
191 201
                 // Remove namespaced attributes
192
-                if (strpos($key, ':') !== false) continue;
202
+                if (strpos($key, ':') !== false) {
203
+                	continue;
204
+                }
193 205
                 // Check if we should minimize the attribute: val="val" -> val
194 206
                 if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
195 207
                     $html .= $key . ' ';
@@ -245,7 +257,9 @@  discard block
 block discarded – undo
245 257
     public function escape($string, $quote = null) {
246 258
         // Workaround for APC bug on Mac Leopard reported by sidepodcast
247 259
         // http://htmlpurifier.org/phorum/read.php?3,4823,4846
248
-        if ($quote === null) $quote = ENT_COMPAT;
260
+        if ($quote === null) {
261
+        	$quote = ENT_COMPAT;
262
+        }
249 263
         return htmlspecialchars($string, $quote, 'UTF-8', false);
250 264
     }
251 265
 
Please login to merge, or discard this patch.
Spacing   +9 added lines, -9 removed lines patch added patch discarded remove patch
@@ -79,7 +79,7 @@  discard block
 block discarded – undo
79 79
         $html = '';
80 80
         for ($i = 0, $size = count($tokens); $i < $size; $i++) {
81 81
             if ($this->_scriptFix && $tokens[$i]->name === 'script'
82
-                && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) {
82
+                && $i + 2 < $size && $tokens[$i + 2] instanceof HTMLPurifier_Token_End) {
83 83
                 // script special case
84 84
                 // the contents of the script block must be ONE token
85 85
                 // for this to work.
@@ -132,7 +132,7 @@  discard block
 block discarded – undo
132 132
                     $this->_flashStack[] = $flash;
133 133
                 }
134 134
             }
135
-            return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
135
+            return '<'.$token->name.($attr ? ' ' : '').$attr.'>';
136 136
 
137 137
         } elseif ($token instanceof HTMLPurifier_Token_End) {
138 138
             $_extra = '';
@@ -141,22 +141,22 @@  discard block
 block discarded – undo
141 141
                     // doesn't do anything for now
142 142
                 }
143 143
             }
144
-            return $_extra . '</' . $token->name . '>';
144
+            return $_extra.'</'.$token->name.'>';
145 145
 
146 146
         } elseif ($token instanceof HTMLPurifier_Token_Empty) {
147 147
             if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) {
148
-                $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value'];
148
+                $this->_flashStack[count($this->_flashStack) - 1]->param[$token->attr['name']] = $token->attr['value'];
149 149
             }
150 150
             $attr = $this->generateAttributes($token->attr, $token->name);
151
-             return '<' . $token->name . ($attr ? ' ' : '') . $attr .
152
-                ( $this->_xhtml ? ' /': '' ) // <br /> v. <br>
151
+             return '<'.$token->name.($attr ? ' ' : '').$attr.
152
+                ($this->_xhtml ? ' /' : '') // <br /> v. <br>
153 153
                 . '>';
154 154
 
155 155
         } elseif ($token instanceof HTMLPurifier_Token_Text) {
156 156
             return $this->escape($token->data, ENT_NOQUOTES);
157 157
 
158 158
         } elseif ($token instanceof HTMLPurifier_Token_Comment) {
159
-            return '<!--' . $token->data . '-->';
159
+            return '<!--'.$token->data.'-->';
160 160
         } else {
161 161
             return '';
162 162
 
@@ -172,7 +172,7 @@  discard block
 block discarded – undo
172 172
         if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token);
173 173
         // Thanks <http://lachy.id.au/log/2005/05/script-comments>
174 174
         $data = preg_replace('#//\s*$#', '', $token->data);
175
-        return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
175
+        return '<!--//--><![CDATA[//><!--'."\n".trim($data)."\n".'//--><!]]>';
176 176
     }
177 177
 
178 178
     /**
@@ -192,7 +192,7 @@  discard block
 block discarded – undo
192 192
                 if (strpos($key, ':') !== false) continue;
193 193
                 // Check if we should minimize the attribute: val="val" -> val
194 194
                 if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) {
195
-                    $html .= $key . ' ';
195
+                    $html .= $key.' ';
196 196
                     continue;
197 197
                 }
198 198
             }
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/HTMLDefinition.php 4 patches
Doc Comments   +1 added lines, -2 removed lines patch added patch discarded remove patch
@@ -385,8 +385,7 @@
 block discarded – undo
385 385
      * separate lists for processing. Format is element[attr1|attr2],element2...
386 386
      * @warning Although it's largely drawn from TinyMCE's implementation,
387 387
      *      it is different, and you'll probably have to modify your lists
388
-     * @param $list String list to parse
389
-     * @param array($allowed_elements, $allowed_attributes)
388
+     * @param string $list String list to parse
390 389
      * @todo Give this its own class, probably static interface
391 390
      */
392 391
     public function parseTinyMCEAllowedList($list) {
Please login to merge, or discard this patch.
Indentation   +392 added lines, -392 removed lines patch added patch discarded remove patch
@@ -26,398 +26,398 @@
 block discarded – undo
26 26
 class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
27 27
 {
28 28
 
29
-    // FULLY-PUBLIC VARIABLES ---------------------------------------------
30
-
31
-    /**
32
-     * Associative array of element names to HTMLPurifier_ElementDef
33
-     */
34
-    public $info = array();
35
-
36
-    /**
37
-     * Associative array of global attribute name to attribute definition.
38
-     */
39
-    public $info_global_attr = array();
40
-
41
-    /**
42
-     * String name of parent element HTML will be going into.
43
-     */
44
-    public $info_parent = 'div';
45
-
46
-    /**
47
-     * Definition for parent element, allows parent element to be a
48
-     * tag that's not allowed inside the HTML fragment.
49
-     */
50
-    public $info_parent_def;
51
-
52
-    /**
53
-     * String name of element used to wrap inline elements in block context
54
-     * @note This is rarely used except for BLOCKQUOTEs in strict mode
55
-     */
56
-    public $info_block_wrapper = 'p';
57
-
58
-    /**
59
-     * Associative array of deprecated tag name to HTMLPurifier_TagTransform
60
-     */
61
-    public $info_tag_transform = array();
62
-
63
-    /**
64
-     * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
65
-     */
66
-    public $info_attr_transform_pre = array();
67
-
68
-    /**
69
-     * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
70
-     */
71
-    public $info_attr_transform_post = array();
72
-
73
-    /**
74
-     * Nested lookup array of content set name (Block, Inline) to
75
-     * element name to whether or not it belongs in that content set.
76
-     */
77
-    public $info_content_sets = array();
78
-
79
-    /**
80
-     * Indexed list of HTMLPurifier_Injector to be used.
81
-     */
82
-    public $info_injector = array();
83
-
84
-    /**
85
-     * Doctype object
86
-     */
87
-    public $doctype;
88
-
89
-
90
-
91
-    // RAW CUSTOMIZATION STUFF --------------------------------------------
92
-
93
-    /**
94
-     * Adds a custom attribute to a pre-existing element
95
-     * @note This is strictly convenience, and does not have a corresponding
96
-     *       method in HTMLPurifier_HTMLModule
97
-     * @param $element_name String element name to add attribute to
98
-     * @param $attr_name String name of attribute
99
-     * @param $def Attribute definition, can be string or object, see
100
-     *             HTMLPurifier_AttrTypes for details
101
-     */
102
-    public function addAttribute($element_name, $attr_name, $def) {
103
-        $module = $this->getAnonymousModule();
104
-        if (!isset($module->info[$element_name])) {
105
-            $element = $module->addBlankElement($element_name);
106
-        } else {
107
-            $element = $module->info[$element_name];
108
-        }
109
-        $element->attr[$attr_name] = $def;
110
-    }
111
-
112
-    /**
113
-     * Adds a custom element to your HTML definition
114
-     * @note See HTMLPurifier_HTMLModule::addElement for detailed
115
-     *       parameter and return value descriptions.
116
-     */
117
-    public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) {
118
-        $module = $this->getAnonymousModule();
119
-        // assume that if the user is calling this, the element
120
-        // is safe. This may not be a good idea
121
-        $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
122
-        return $element;
123
-    }
124
-
125
-    /**
126
-     * Adds a blank element to your HTML definition, for overriding
127
-     * existing behavior
128
-     * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
129
-     *       parameter and return value descriptions.
130
-     */
131
-    public function addBlankElement($element_name) {
132
-        $module  = $this->getAnonymousModule();
133
-        $element = $module->addBlankElement($element_name);
134
-        return $element;
135
-    }
136
-
137
-    /**
138
-     * Retrieves a reference to the anonymous module, so you can
139
-     * bust out advanced features without having to make your own
140
-     * module.
141
-     */
142
-    public function getAnonymousModule() {
143
-        if (!$this->_anonModule) {
144
-            $this->_anonModule = new HTMLPurifier_HTMLModule();
145
-            $this->_anonModule->name = 'Anonymous';
146
-        }
147
-        return $this->_anonModule;
148
-    }
149
-
150
-    private $_anonModule = null;
151
-
152
-
153
-    // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
154
-
155
-    public $type = 'HTML';
156
-    public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
157
-
158
-    /**
159
-     * Performs low-cost, preliminary initialization.
160
-     */
161
-    public function __construct() {
162
-        $this->manager = new HTMLPurifier_HTMLModuleManager();
163
-    }
164
-
165
-    protected function doSetup($config) {
166
-        $this->processModules($config);
167
-        $this->setupConfigStuff($config);
168
-        unset($this->manager);
169
-
170
-        // cleanup some of the element definitions
171
-        foreach ($this->info as $k => $v) {
172
-            unset($this->info[$k]->content_model);
173
-            unset($this->info[$k]->content_model_type);
174
-        }
175
-    }
176
-
177
-    /**
178
-     * Extract out the information from the manager
179
-     */
180
-    protected function processModules($config) {
181
-
182
-        if ($this->_anonModule) {
183
-            // for user specific changes
184
-            // this is late-loaded so we don't have to deal with PHP4
185
-            // reference wonky-ness
186
-            $this->manager->addModule($this->_anonModule);
187
-            unset($this->_anonModule);
188
-        }
189
-
190
-        $this->manager->setup($config);
191
-        $this->doctype = $this->manager->doctype;
192
-
193
-        foreach ($this->manager->modules as $module) {
194
-            foreach($module->info_tag_transform as $k => $v) {
195
-                if ($v === false) unset($this->info_tag_transform[$k]);
196
-                else $this->info_tag_transform[$k] = $v;
197
-            }
198
-            foreach($module->info_attr_transform_pre as $k => $v) {
199
-                if ($v === false) unset($this->info_attr_transform_pre[$k]);
200
-                else $this->info_attr_transform_pre[$k] = $v;
201
-            }
202
-            foreach($module->info_attr_transform_post as $k => $v) {
203
-                if ($v === false) unset($this->info_attr_transform_post[$k]);
204
-                else $this->info_attr_transform_post[$k] = $v;
205
-            }
206
-            foreach ($module->info_injector as $k => $v) {
207
-                if ($v === false) unset($this->info_injector[$k]);
208
-                else $this->info_injector[$k] = $v;
209
-            }
210
-        }
211
-
212
-        $this->info = $this->manager->getElements();
213
-        $this->info_content_sets = $this->manager->contentSets->lookup;
214
-
215
-    }
216
-
217
-    /**
218
-     * Sets up stuff based on config. We need a better way of doing this.
219
-     */
220
-    protected function setupConfigStuff($config) {
221
-
222
-        $block_wrapper = $config->get('HTML.BlockWrapper');
223
-        if (isset($this->info_content_sets['Block'][$block_wrapper])) {
224
-            $this->info_block_wrapper = $block_wrapper;
225
-        } else {
226
-            trigger_error('Cannot use non-block element as block wrapper',
227
-                E_USER_ERROR);
228
-        }
229
-
230
-        $parent = $config->get('HTML.Parent');
231
-        $def = $this->manager->getElement($parent, true);
232
-        if ($def) {
233
-            $this->info_parent = $parent;
234
-            $this->info_parent_def = $def;
235
-        } else {
236
-            trigger_error('Cannot use unrecognized element as parent',
237
-                E_USER_ERROR);
238
-            $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
239
-        }
240
-
241
-        // support template text
242
-        $support = "(for information on implementing this, see the ".
243
-                   "support forums) ";
244
-
245
-        // setup allowed elements -----------------------------------------
246
-
247
-        $allowed_elements = $config->get('HTML.AllowedElements');
248
-        $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
249
-
250
-        if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
251
-            $allowed = $config->get('HTML.Allowed');
252
-            if (is_string($allowed)) {
253
-                list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
254
-            }
255
-        }
256
-
257
-        if (is_array($allowed_elements)) {
258
-            foreach ($this->info as $name => $d) {
259
-                if(!isset($allowed_elements[$name])) unset($this->info[$name]);
260
-                unset($allowed_elements[$name]);
261
-            }
262
-            // emit errors
263
-            foreach ($allowed_elements as $element => $d) {
264
-                $element = htmlspecialchars($element, ENT_COMPAT | ENT_HTML401, 'UTF-8', false); // PHP doesn't escape errors, be careful!
265
-                trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
266
-            }
267
-        }
268
-
269
-        // setup allowed attributes ---------------------------------------
270
-
271
-        $allowed_attributes_mutable = $allowed_attributes; // by copy!
272
-        if (is_array($allowed_attributes)) {
273
-
274
-            // This actually doesn't do anything, since we went away from
275
-            // global attributes. It's possible that userland code uses
276
-            // it, but HTMLModuleManager doesn't!
277
-            foreach ($this->info_global_attr as $attr => $x) {
278
-                $keys = array($attr, "*@$attr", "*.$attr");
279
-                $delete = true;
280
-                foreach ($keys as $key) {
281
-                    if ($delete && isset($allowed_attributes[$key])) {
282
-                        $delete = false;
283
-                    }
284
-                    if (isset($allowed_attributes_mutable[$key])) {
285
-                        unset($allowed_attributes_mutable[$key]);
286
-                    }
287
-                }
288
-                if ($delete) unset($this->info_global_attr[$attr]);
289
-            }
290
-
291
-            foreach ($this->info as $tag => $info) {
292
-                foreach ($info->attr as $attr => $x) {
293
-                    $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
294
-                    $delete = true;
295
-                    foreach ($keys as $key) {
296
-                        if ($delete && isset($allowed_attributes[$key])) {
297
-                            $delete = false;
298
-                        }
299
-                        if (isset($allowed_attributes_mutable[$key])) {
300
-                            unset($allowed_attributes_mutable[$key]);
301
-                        }
302
-                    }
303
-                    if ($delete) {
304
-                        if ($this->info[$tag]->attr[$attr]->required) {
305
-                            trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING);
306
-                        }
307
-                        unset($this->info[$tag]->attr[$attr]);
308
-                    }
309
-                }
310
-            }
311
-            // emit errors
312
-            foreach ($allowed_attributes_mutable as $elattr => $d) {
313
-                $bits = preg_split('/[.@]/', $elattr, 2);
314
-                $c = count($bits);
315
-                switch ($c) {
316
-                    case 2:
317
-                        if ($bits[0] !== '*') {
318
-                            $element = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
319
-                            $attribute = htmlspecialchars($bits[1], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
320
-                            if (!isset($this->info[$element])) {
321
-                                trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support");
322
-                            } else {
323
-                                trigger_error("Attribute '$attribute' in element '$element' not supported $support",
324
-                                    E_USER_WARNING);
325
-                            }
326
-                            break;
327
-                        }
328
-                        // otherwise fall through
329
-                    case 1:
330
-                        $attribute = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
331
-                        trigger_error("Global attribute '$attribute' is not ".
332
-                            "supported in any elements $support",
333
-                            E_USER_WARNING);
334
-                        break;
335
-                }
336
-            }
337
-
338
-        }
339
-
340
-        // setup forbidden elements ---------------------------------------
341
-
342
-        $forbidden_elements   = $config->get('HTML.ForbiddenElements');
343
-        $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
344
-
345
-        foreach ($this->info as $tag => $info) {
346
-            if (isset($forbidden_elements[$tag])) {
347
-                unset($this->info[$tag]);
348
-                continue;
349
-            }
350
-            foreach ($info->attr as $attr => $x) {
351
-                if (
352
-                    isset($forbidden_attributes["$tag@$attr"]) ||
353
-                    isset($forbidden_attributes["*@$attr"]) ||
354
-                    isset($forbidden_attributes[$attr])
355
-                ) {
356
-                    unset($this->info[$tag]->attr[$attr]);
357
-                    continue;
358
-                } // this segment might get removed eventually
359
-                elseif (isset($forbidden_attributes["$tag.$attr"])) {
360
-                    // $tag.$attr are not user supplied, so no worries!
361
-                    trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING);
362
-                }
363
-            }
364
-        }
365
-        foreach ($forbidden_attributes as $key => $v) {
366
-            if (strlen($key) < 2) continue;
367
-            if ($key[0] != '*') continue;
368
-            if ($key[1] == '.') {
369
-                trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
370
-            }
371
-        }
372
-
373
-        // setup injectors -----------------------------------------------------
374
-        foreach ($this->info_injector as $i => $injector) {
375
-            if ($injector->checkNeeded($config) !== false) {
376
-                // remove injector that does not have it's required
377
-                // elements/attributes present, and is thus not needed.
378
-                unset($this->info_injector[$i]);
379
-            }
380
-        }
381
-    }
382
-
383
-    /**
384
-     * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
385
-     * separate lists for processing. Format is element[attr1|attr2],element2...
386
-     * @warning Although it's largely drawn from TinyMCE's implementation,
387
-     *      it is different, and you'll probably have to modify your lists
388
-     * @param $list String list to parse
389
-     * @param array($allowed_elements, $allowed_attributes)
390
-     * @todo Give this its own class, probably static interface
391
-     */
392
-    public function parseTinyMCEAllowedList($list) {
393
-
394
-        $list = str_replace(array(' ', "\t"), '', $list);
395
-
396
-        $elements = array();
397
-        $attributes = array();
398
-
399
-        $chunks = preg_split('/(,|[\n\r]+)/', $list);
400
-        foreach ($chunks as $chunk) {
401
-            if (empty($chunk)) continue;
402
-            // remove TinyMCE element control characters
403
-            if (!strpos($chunk, '[')) {
404
-                $element = $chunk;
405
-                $attr = false;
406
-            } else {
407
-                list($element, $attr) = explode('[', $chunk);
408
-            }
409
-            if ($element !== '*') $elements[$element] = true;
410
-            if (!$attr) continue;
411
-            $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
412
-            $attr = explode('|', $attr);
413
-            foreach ($attr as $key) {
414
-                $attributes["$element.$key"] = true;
415
-            }
416
-        }
417
-
418
-        return array($elements, $attributes);
419
-
420
-    }
29
+	// FULLY-PUBLIC VARIABLES ---------------------------------------------
30
+
31
+	/**
32
+	 * Associative array of element names to HTMLPurifier_ElementDef
33
+	 */
34
+	public $info = array();
35
+
36
+	/**
37
+	 * Associative array of global attribute name to attribute definition.
38
+	 */
39
+	public $info_global_attr = array();
40
+
41
+	/**
42
+	 * String name of parent element HTML will be going into.
43
+	 */
44
+	public $info_parent = 'div';
45
+
46
+	/**
47
+	 * Definition for parent element, allows parent element to be a
48
+	 * tag that's not allowed inside the HTML fragment.
49
+	 */
50
+	public $info_parent_def;
51
+
52
+	/**
53
+	 * String name of element used to wrap inline elements in block context
54
+	 * @note This is rarely used except for BLOCKQUOTEs in strict mode
55
+	 */
56
+	public $info_block_wrapper = 'p';
57
+
58
+	/**
59
+	 * Associative array of deprecated tag name to HTMLPurifier_TagTransform
60
+	 */
61
+	public $info_tag_transform = array();
62
+
63
+	/**
64
+	 * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
65
+	 */
66
+	public $info_attr_transform_pre = array();
67
+
68
+	/**
69
+	 * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
70
+	 */
71
+	public $info_attr_transform_post = array();
72
+
73
+	/**
74
+	 * Nested lookup array of content set name (Block, Inline) to
75
+	 * element name to whether or not it belongs in that content set.
76
+	 */
77
+	public $info_content_sets = array();
78
+
79
+	/**
80
+	 * Indexed list of HTMLPurifier_Injector to be used.
81
+	 */
82
+	public $info_injector = array();
83
+
84
+	/**
85
+	 * Doctype object
86
+	 */
87
+	public $doctype;
88
+
89
+
90
+
91
+	// RAW CUSTOMIZATION STUFF --------------------------------------------
92
+
93
+	/**
94
+	 * Adds a custom attribute to a pre-existing element
95
+	 * @note This is strictly convenience, and does not have a corresponding
96
+	 *       method in HTMLPurifier_HTMLModule
97
+	 * @param $element_name String element name to add attribute to
98
+	 * @param $attr_name String name of attribute
99
+	 * @param $def Attribute definition, can be string or object, see
100
+	 *             HTMLPurifier_AttrTypes for details
101
+	 */
102
+	public function addAttribute($element_name, $attr_name, $def) {
103
+		$module = $this->getAnonymousModule();
104
+		if (!isset($module->info[$element_name])) {
105
+			$element = $module->addBlankElement($element_name);
106
+		} else {
107
+			$element = $module->info[$element_name];
108
+		}
109
+		$element->attr[$attr_name] = $def;
110
+	}
111
+
112
+	/**
113
+	 * Adds a custom element to your HTML definition
114
+	 * @note See HTMLPurifier_HTMLModule::addElement for detailed
115
+	 *       parameter and return value descriptions.
116
+	 */
117
+	public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) {
118
+		$module = $this->getAnonymousModule();
119
+		// assume that if the user is calling this, the element
120
+		// is safe. This may not be a good idea
121
+		$element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
122
+		return $element;
123
+	}
124
+
125
+	/**
126
+	 * Adds a blank element to your HTML definition, for overriding
127
+	 * existing behavior
128
+	 * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
129
+	 *       parameter and return value descriptions.
130
+	 */
131
+	public function addBlankElement($element_name) {
132
+		$module  = $this->getAnonymousModule();
133
+		$element = $module->addBlankElement($element_name);
134
+		return $element;
135
+	}
136
+
137
+	/**
138
+	 * Retrieves a reference to the anonymous module, so you can
139
+	 * bust out advanced features without having to make your own
140
+	 * module.
141
+	 */
142
+	public function getAnonymousModule() {
143
+		if (!$this->_anonModule) {
144
+			$this->_anonModule = new HTMLPurifier_HTMLModule();
145
+			$this->_anonModule->name = 'Anonymous';
146
+		}
147
+		return $this->_anonModule;
148
+	}
149
+
150
+	private $_anonModule = null;
151
+
152
+
153
+	// PUBLIC BUT INTERNAL VARIABLES --------------------------------------
154
+
155
+	public $type = 'HTML';
156
+	public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
157
+
158
+	/**
159
+	 * Performs low-cost, preliminary initialization.
160
+	 */
161
+	public function __construct() {
162
+		$this->manager = new HTMLPurifier_HTMLModuleManager();
163
+	}
164
+
165
+	protected function doSetup($config) {
166
+		$this->processModules($config);
167
+		$this->setupConfigStuff($config);
168
+		unset($this->manager);
169
+
170
+		// cleanup some of the element definitions
171
+		foreach ($this->info as $k => $v) {
172
+			unset($this->info[$k]->content_model);
173
+			unset($this->info[$k]->content_model_type);
174
+		}
175
+	}
176
+
177
+	/**
178
+	 * Extract out the information from the manager
179
+	 */
180
+	protected function processModules($config) {
181
+
182
+		if ($this->_anonModule) {
183
+			// for user specific changes
184
+			// this is late-loaded so we don't have to deal with PHP4
185
+			// reference wonky-ness
186
+			$this->manager->addModule($this->_anonModule);
187
+			unset($this->_anonModule);
188
+		}
189
+
190
+		$this->manager->setup($config);
191
+		$this->doctype = $this->manager->doctype;
192
+
193
+		foreach ($this->manager->modules as $module) {
194
+			foreach($module->info_tag_transform as $k => $v) {
195
+				if ($v === false) unset($this->info_tag_transform[$k]);
196
+				else $this->info_tag_transform[$k] = $v;
197
+			}
198
+			foreach($module->info_attr_transform_pre as $k => $v) {
199
+				if ($v === false) unset($this->info_attr_transform_pre[$k]);
200
+				else $this->info_attr_transform_pre[$k] = $v;
201
+			}
202
+			foreach($module->info_attr_transform_post as $k => $v) {
203
+				if ($v === false) unset($this->info_attr_transform_post[$k]);
204
+				else $this->info_attr_transform_post[$k] = $v;
205
+			}
206
+			foreach ($module->info_injector as $k => $v) {
207
+				if ($v === false) unset($this->info_injector[$k]);
208
+				else $this->info_injector[$k] = $v;
209
+			}
210
+		}
211
+
212
+		$this->info = $this->manager->getElements();
213
+		$this->info_content_sets = $this->manager->contentSets->lookup;
214
+
215
+	}
216
+
217
+	/**
218
+	 * Sets up stuff based on config. We need a better way of doing this.
219
+	 */
220
+	protected function setupConfigStuff($config) {
221
+
222
+		$block_wrapper = $config->get('HTML.BlockWrapper');
223
+		if (isset($this->info_content_sets['Block'][$block_wrapper])) {
224
+			$this->info_block_wrapper = $block_wrapper;
225
+		} else {
226
+			trigger_error('Cannot use non-block element as block wrapper',
227
+				E_USER_ERROR);
228
+		}
229
+
230
+		$parent = $config->get('HTML.Parent');
231
+		$def = $this->manager->getElement($parent, true);
232
+		if ($def) {
233
+			$this->info_parent = $parent;
234
+			$this->info_parent_def = $def;
235
+		} else {
236
+			trigger_error('Cannot use unrecognized element as parent',
237
+				E_USER_ERROR);
238
+			$this->info_parent_def = $this->manager->getElement($this->info_parent, true);
239
+		}
240
+
241
+		// support template text
242
+		$support = "(for information on implementing this, see the ".
243
+				   "support forums) ";
244
+
245
+		// setup allowed elements -----------------------------------------
246
+
247
+		$allowed_elements = $config->get('HTML.AllowedElements');
248
+		$allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
249
+
250
+		if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
251
+			$allowed = $config->get('HTML.Allowed');
252
+			if (is_string($allowed)) {
253
+				list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
254
+			}
255
+		}
256
+
257
+		if (is_array($allowed_elements)) {
258
+			foreach ($this->info as $name => $d) {
259
+				if(!isset($allowed_elements[$name])) unset($this->info[$name]);
260
+				unset($allowed_elements[$name]);
261
+			}
262
+			// emit errors
263
+			foreach ($allowed_elements as $element => $d) {
264
+				$element = htmlspecialchars($element, ENT_COMPAT | ENT_HTML401, 'UTF-8', false); // PHP doesn't escape errors, be careful!
265
+				trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
266
+			}
267
+		}
268
+
269
+		// setup allowed attributes ---------------------------------------
270
+
271
+		$allowed_attributes_mutable = $allowed_attributes; // by copy!
272
+		if (is_array($allowed_attributes)) {
273
+
274
+			// This actually doesn't do anything, since we went away from
275
+			// global attributes. It's possible that userland code uses
276
+			// it, but HTMLModuleManager doesn't!
277
+			foreach ($this->info_global_attr as $attr => $x) {
278
+				$keys = array($attr, "*@$attr", "*.$attr");
279
+				$delete = true;
280
+				foreach ($keys as $key) {
281
+					if ($delete && isset($allowed_attributes[$key])) {
282
+						$delete = false;
283
+					}
284
+					if (isset($allowed_attributes_mutable[$key])) {
285
+						unset($allowed_attributes_mutable[$key]);
286
+					}
287
+				}
288
+				if ($delete) unset($this->info_global_attr[$attr]);
289
+			}
290
+
291
+			foreach ($this->info as $tag => $info) {
292
+				foreach ($info->attr as $attr => $x) {
293
+					$keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
294
+					$delete = true;
295
+					foreach ($keys as $key) {
296
+						if ($delete && isset($allowed_attributes[$key])) {
297
+							$delete = false;
298
+						}
299
+						if (isset($allowed_attributes_mutable[$key])) {
300
+							unset($allowed_attributes_mutable[$key]);
301
+						}
302
+					}
303
+					if ($delete) {
304
+						if ($this->info[$tag]->attr[$attr]->required) {
305
+							trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING);
306
+						}
307
+						unset($this->info[$tag]->attr[$attr]);
308
+					}
309
+				}
310
+			}
311
+			// emit errors
312
+			foreach ($allowed_attributes_mutable as $elattr => $d) {
313
+				$bits = preg_split('/[.@]/', $elattr, 2);
314
+				$c = count($bits);
315
+				switch ($c) {
316
+					case 2:
317
+						if ($bits[0] !== '*') {
318
+							$element = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
319
+							$attribute = htmlspecialchars($bits[1], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
320
+							if (!isset($this->info[$element])) {
321
+								trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support");
322
+							} else {
323
+								trigger_error("Attribute '$attribute' in element '$element' not supported $support",
324
+									E_USER_WARNING);
325
+							}
326
+							break;
327
+						}
328
+						// otherwise fall through
329
+					case 1:
330
+						$attribute = htmlspecialchars($bits[0], ENT_COMPAT | ENT_HTML401, 'UTF-8', false);
331
+						trigger_error("Global attribute '$attribute' is not ".
332
+							"supported in any elements $support",
333
+							E_USER_WARNING);
334
+						break;
335
+				}
336
+			}
337
+
338
+		}
339
+
340
+		// setup forbidden elements ---------------------------------------
341
+
342
+		$forbidden_elements   = $config->get('HTML.ForbiddenElements');
343
+		$forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
344
+
345
+		foreach ($this->info as $tag => $info) {
346
+			if (isset($forbidden_elements[$tag])) {
347
+				unset($this->info[$tag]);
348
+				continue;
349
+			}
350
+			foreach ($info->attr as $attr => $x) {
351
+				if (
352
+					isset($forbidden_attributes["$tag@$attr"]) ||
353
+					isset($forbidden_attributes["*@$attr"]) ||
354
+					isset($forbidden_attributes[$attr])
355
+				) {
356
+					unset($this->info[$tag]->attr[$attr]);
357
+					continue;
358
+				} // this segment might get removed eventually
359
+				elseif (isset($forbidden_attributes["$tag.$attr"])) {
360
+					// $tag.$attr are not user supplied, so no worries!
361
+					trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING);
362
+				}
363
+			}
364
+		}
365
+		foreach ($forbidden_attributes as $key => $v) {
366
+			if (strlen($key) < 2) continue;
367
+			if ($key[0] != '*') continue;
368
+			if ($key[1] == '.') {
369
+				trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
370
+			}
371
+		}
372
+
373
+		// setup injectors -----------------------------------------------------
374
+		foreach ($this->info_injector as $i => $injector) {
375
+			if ($injector->checkNeeded($config) !== false) {
376
+				// remove injector that does not have it's required
377
+				// elements/attributes present, and is thus not needed.
378
+				unset($this->info_injector[$i]);
379
+			}
380
+		}
381
+	}
382
+
383
+	/**
384
+	 * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
385
+	 * separate lists for processing. Format is element[attr1|attr2],element2...
386
+	 * @warning Although it's largely drawn from TinyMCE's implementation,
387
+	 *      it is different, and you'll probably have to modify your lists
388
+	 * @param $list String list to parse
389
+	 * @param array($allowed_elements, $allowed_attributes)
390
+	 * @todo Give this its own class, probably static interface
391
+	 */
392
+	public function parseTinyMCEAllowedList($list) {
393
+
394
+		$list = str_replace(array(' ', "\t"), '', $list);
395
+
396
+		$elements = array();
397
+		$attributes = array();
398
+
399
+		$chunks = preg_split('/(,|[\n\r]+)/', $list);
400
+		foreach ($chunks as $chunk) {
401
+			if (empty($chunk)) continue;
402
+			// remove TinyMCE element control characters
403
+			if (!strpos($chunk, '[')) {
404
+				$element = $chunk;
405
+				$attr = false;
406
+			} else {
407
+				list($element, $attr) = explode('[', $chunk);
408
+			}
409
+			if ($element !== '*') $elements[$element] = true;
410
+			if (!$attr) continue;
411
+			$attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
412
+			$attr = explode('|', $attr);
413
+			foreach ($attr as $key) {
414
+				$attributes["$element.$key"] = true;
415
+			}
416
+		}
417
+
418
+		return array($elements, $attributes);
419
+
420
+	}
421 421
 
422 422
 
423 423
 }
Please login to merge, or discard this patch.
Braces   +41 added lines, -15 removed lines patch added patch discarded remove patch
@@ -192,20 +192,32 @@  discard block
 block discarded – undo
192 192
 
193 193
         foreach ($this->manager->modules as $module) {
194 194
             foreach($module->info_tag_transform as $k => $v) {
195
-                if ($v === false) unset($this->info_tag_transform[$k]);
196
-                else $this->info_tag_transform[$k] = $v;
195
+                if ($v === false) {
196
+                	unset($this->info_tag_transform[$k]);
197
+                } else {
198
+                	$this->info_tag_transform[$k] = $v;
199
+                }
197 200
             }
198 201
             foreach($module->info_attr_transform_pre as $k => $v) {
199
-                if ($v === false) unset($this->info_attr_transform_pre[$k]);
200
-                else $this->info_attr_transform_pre[$k] = $v;
202
+                if ($v === false) {
203
+                	unset($this->info_attr_transform_pre[$k]);
204
+                } else {
205
+                	$this->info_attr_transform_pre[$k] = $v;
206
+                }
201 207
             }
202 208
             foreach($module->info_attr_transform_post as $k => $v) {
203
-                if ($v === false) unset($this->info_attr_transform_post[$k]);
204
-                else $this->info_attr_transform_post[$k] = $v;
209
+                if ($v === false) {
210
+                	unset($this->info_attr_transform_post[$k]);
211
+                } else {
212
+                	$this->info_attr_transform_post[$k] = $v;
213
+                }
205 214
             }
206 215
             foreach ($module->info_injector as $k => $v) {
207
-                if ($v === false) unset($this->info_injector[$k]);
208
-                else $this->info_injector[$k] = $v;
216
+                if ($v === false) {
217
+                	unset($this->info_injector[$k]);
218
+                } else {
219
+                	$this->info_injector[$k] = $v;
220
+                }
209 221
             }
210 222
         }
211 223
 
@@ -256,7 +268,9 @@  discard block
 block discarded – undo
256 268
 
257 269
         if (is_array($allowed_elements)) {
258 270
             foreach ($this->info as $name => $d) {
259
-                if(!isset($allowed_elements[$name])) unset($this->info[$name]);
271
+                if(!isset($allowed_elements[$name])) {
272
+                	unset($this->info[$name]);
273
+                }
260 274
                 unset($allowed_elements[$name]);
261 275
             }
262 276
             // emit errors
@@ -285,7 +299,9 @@  discard block
 block discarded – undo
285 299
                         unset($allowed_attributes_mutable[$key]);
286 300
                     }
287 301
                 }
288
-                if ($delete) unset($this->info_global_attr[$attr]);
302
+                if ($delete) {
303
+                	unset($this->info_global_attr[$attr]);
304
+                }
289 305
             }
290 306
 
291 307
             foreach ($this->info as $tag => $info) {
@@ -363,8 +379,12 @@  discard block
 block discarded – undo
363 379
             }
364 380
         }
365 381
         foreach ($forbidden_attributes as $key => $v) {
366
-            if (strlen($key) < 2) continue;
367
-            if ($key[0] != '*') continue;
382
+            if (strlen($key) < 2) {
383
+            	continue;
384
+            }
385
+            if ($key[0] != '*') {
386
+            	continue;
387
+            }
368 388
             if ($key[1] == '.') {
369 389
                 trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
370 390
             }
@@ -398,7 +418,9 @@  discard block
 block discarded – undo
398 418
 
399 419
         $chunks = preg_split('/(,|[\n\r]+)/', $list);
400 420
         foreach ($chunks as $chunk) {
401
-            if (empty($chunk)) continue;
421
+            if (empty($chunk)) {
422
+            	continue;
423
+            }
402 424
             // remove TinyMCE element control characters
403 425
             if (!strpos($chunk, '[')) {
404 426
                 $element = $chunk;
@@ -406,8 +428,12 @@  discard block
 block discarded – undo
406 428
             } else {
407 429
                 list($element, $attr) = explode('[', $chunk);
408 430
             }
409
-            if ($element !== '*') $elements[$element] = true;
410
-            if (!$attr) continue;
431
+            if ($element !== '*') {
432
+            	$elements[$element] = true;
433
+            }
434
+            if (!$attr) {
435
+            	continue;
436
+            }
411 437
             $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
412 438
             $attr = explode('|', $attr);
413 439
             foreach ($attr as $key) {
Please login to merge, or discard this patch.
Spacing   +4 added lines, -4 removed lines patch added patch discarded remove patch
@@ -191,15 +191,15 @@  discard block
 block discarded – undo
191 191
         $this->doctype = $this->manager->doctype;
192 192
 
193 193
         foreach ($this->manager->modules as $module) {
194
-            foreach($module->info_tag_transform as $k => $v) {
194
+            foreach ($module->info_tag_transform as $k => $v) {
195 195
                 if ($v === false) unset($this->info_tag_transform[$k]);
196 196
                 else $this->info_tag_transform[$k] = $v;
197 197
             }
198
-            foreach($module->info_attr_transform_pre as $k => $v) {
198
+            foreach ($module->info_attr_transform_pre as $k => $v) {
199 199
                 if ($v === false) unset($this->info_attr_transform_pre[$k]);
200 200
                 else $this->info_attr_transform_pre[$k] = $v;
201 201
             }
202
-            foreach($module->info_attr_transform_post as $k => $v) {
202
+            foreach ($module->info_attr_transform_post as $k => $v) {
203 203
                 if ($v === false) unset($this->info_attr_transform_post[$k]);
204 204
                 else $this->info_attr_transform_post[$k] = $v;
205 205
             }
@@ -256,7 +256,7 @@  discard block
 block discarded – undo
256 256
 
257 257
         if (is_array($allowed_elements)) {
258 258
             foreach ($this->info as $name => $d) {
259
-                if(!isset($allowed_elements[$name])) unset($this->info[$name]);
259
+                if (!isset($allowed_elements[$name])) unset($this->info[$name]);
260 260
                 unset($allowed_elements[$name]);
261 261
             }
262 262
             // emit errors
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/HTMLModuleManager.php 4 patches
Doc Comments   +2 added lines, -1 removed lines patch added patch discarded remove patch
@@ -179,6 +179,7 @@  discard block
 block discarded – undo
179 179
     /**
180 180
      * Adds a module to the current doctype by first registering it,
181 181
      * and then tacking it on to the active doctype
182
+     * @param HTMLPurifier_HTMLModule $module
182 183
      */
183 184
     public function addModule($module) {
184 185
         $this->registerModule($module);
@@ -325,7 +326,7 @@  discard block
 block discarded – undo
325 326
     /**
326 327
      * Retrieves a single merged element definition
327 328
      * @param $name Name of element
328
-     * @param $trusted Boolean trusted overriding parameter: set to true
329
+     * @param boolean $trusted Boolean trusted overriding parameter: set to true
329 330
      *                 if you want the full version of an element
330 331
      * @return Merged HTMLPurifier_ElementDef
331 332
      * @note You may notice that modules are getting iterated over twice (once
Please login to merge, or discard this patch.
Indentation   +406 added lines, -406 removed lines patch added patch discarded remove patch
@@ -3,412 +3,412 @@
 block discarded – undo
3 3
 class HTMLPurifier_HTMLModuleManager
4 4
 {
5 5
 
6
-    /**
7
-     * Instance of HTMLPurifier_DoctypeRegistry
8
-     */
9
-    public $doctypes;
10
-
11
-    /**
12
-     * Instance of current doctype
13
-     */
14
-    public $doctype;
15
-
16
-    /**
17
-     * Instance of HTMLPurifier_AttrTypes
18
-     */
19
-    public $attrTypes;
20
-
21
-    /**
22
-     * Active instances of modules for the specified doctype are
23
-     * indexed, by name, in this array.
24
-     */
25
-    public $modules = array();
26
-
27
-    /**
28
-     * Array of recognized HTMLPurifier_Module instances, indexed by
29
-     * module's class name. This array is usually lazy loaded, but a
30
-     * user can overload a module by pre-emptively registering it.
31
-     */
32
-    public $registeredModules = array();
33
-
34
-    /**
35
-     * List of extra modules that were added by the user using addModule().
36
-     * These get unconditionally merged into the current doctype, whatever
37
-     * it may be.
38
-     */
39
-    public $userModules = array();
40
-
41
-    /**
42
-     * Associative array of element name to list of modules that have
43
-     * definitions for the element; this array is dynamically filled.
44
-     */
45
-    public $elementLookup = array();
46
-
47
-    /** List of prefixes we should use for registering small names */
48
-    public $prefixes = array('HTMLPurifier_HTMLModule_');
49
-
50
-    public $contentSets;     /**< Instance of HTMLPurifier_ContentSets */
51
-    public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
52
-
53
-    /** If set to true, unsafe elements and attributes will be allowed */
54
-    public $trusted = false;
55
-
56
-    public function __construct() {
57
-
58
-        // editable internal objects
59
-        $this->attrTypes = new HTMLPurifier_AttrTypes();
60
-        $this->doctypes  = new HTMLPurifier_DoctypeRegistry();
61
-
62
-        // setup basic modules
63
-        $common = array(
64
-            'CommonAttributes', 'Text', 'Hypertext', 'List',
65
-            'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
66
-            'StyleAttribute',
67
-            // Unsafe:
68
-            'Scripting', 'Object', 'Forms',
69
-            // Sorta legacy, but present in strict:
70
-            'Name',
71
-        );
72
-        $transitional = array('Legacy', 'Target', 'Iframe');
73
-        $xml = array('XMLCommonAttributes');
74
-        $non_xml = array('NonXMLCommonAttributes');
75
-
76
-        // setup basic doctypes
77
-        $this->doctypes->register(
78
-            'HTML 4.01 Transitional', false,
79
-            array_merge($common, $transitional, $non_xml),
80
-            array('Tidy_Transitional', 'Tidy_Proprietary'),
81
-            array(),
82
-            '-//W3C//DTD HTML 4.01 Transitional//EN',
83
-            'http://www.w3.org/TR/html4/loose.dtd'
84
-        );
85
-
86
-        $this->doctypes->register(
87
-            'HTML 4.01 Strict', false,
88
-            array_merge($common, $non_xml),
89
-            array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
90
-            array(),
91
-            '-//W3C//DTD HTML 4.01//EN',
92
-            'http://www.w3.org/TR/html4/strict.dtd'
93
-        );
94
-
95
-        $this->doctypes->register(
96
-            'XHTML 1.0 Transitional', true,
97
-            array_merge($common, $transitional, $xml, $non_xml),
98
-            array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
99
-            array(),
100
-            '-//W3C//DTD XHTML 1.0 Transitional//EN',
101
-            'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
102
-        );
103
-
104
-        $this->doctypes->register(
105
-            'XHTML 1.0 Strict', true,
106
-            array_merge($common, $xml, $non_xml),
107
-            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
108
-            array(),
109
-            '-//W3C//DTD XHTML 1.0 Strict//EN',
110
-            'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
111
-        );
112
-
113
-        $this->doctypes->register(
114
-            'XHTML 1.1', true,
115
-            // Iframe is a real XHTML 1.1 module, despite being
116
-            // "transitional"!
117
-            array_merge($common, $xml, array('Ruby', 'Iframe')),
118
-            array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
119
-            array(),
120
-            '-//W3C//DTD XHTML 1.1//EN',
121
-            'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
122
-        );
123
-
124
-    }
125
-
126
-    /**
127
-     * Registers a module to the recognized module list, useful for
128
-     * overloading pre-existing modules.
129
-     * @param $module Mixed: string module name, with or without
130
-     *                HTMLPurifier_HTMLModule prefix, or instance of
131
-     *                subclass of HTMLPurifier_HTMLModule.
132
-     * @param $overload Boolean whether or not to overload previous modules.
133
-     *                  If this is not set, and you do overload a module,
134
-     *                  HTML Purifier will complain with a warning.
135
-     * @note This function will not call autoload, you must instantiate
136
-     *       (and thus invoke) autoload outside the method.
137
-     * @note If a string is passed as a module name, different variants
138
-     *       will be tested in this order:
139
-     *          - Check for HTMLPurifier_HTMLModule_$name
140
-     *          - Check all prefixes with $name in order they were added
141
-     *          - Check for literal object name
142
-     *          - Throw fatal error
143
-     *       If your object name collides with an internal class, specify
144
-     *       your module manually. All modules must have been included
145
-     *       externally: registerModule will not perform inclusions for you!
146
-     */
147
-    public function registerModule($module, $overload = false) {
148
-        if (is_string($module)) {
149
-            // attempt to load the module
150
-            $original_module = $module;
151
-            $ok = false;
152
-            foreach ($this->prefixes as $prefix) {
153
-                $module = $prefix . $original_module;
154
-                if (class_exists($module)) {
155
-                    $ok = true;
156
-                    break;
157
-                }
158
-            }
159
-            if (!$ok) {
160
-                $module = $original_module;
161
-                if (!class_exists($module)) {
162
-                    trigger_error($original_module . ' module does not exist',
163
-                        E_USER_ERROR);
164
-                    return;
165
-                }
166
-            }
167
-            $module = new $module();
168
-        }
169
-        if (empty($module->name)) {
170
-            trigger_error('Module instance of ' . get_class($module) . ' must have name');
171
-            return;
172
-        }
173
-        if (!$overload && isset($this->registeredModules[$module->name])) {
174
-            trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
175
-        }
176
-        $this->registeredModules[$module->name] = $module;
177
-    }
178
-
179
-    /**
180
-     * Adds a module to the current doctype by first registering it,
181
-     * and then tacking it on to the active doctype
182
-     */
183
-    public function addModule($module) {
184
-        $this->registerModule($module);
185
-        if (is_object($module)) $module = $module->name;
186
-        $this->userModules[] = $module;
187
-    }
188
-
189
-    /**
190
-     * Adds a class prefix that registerModule() will use to resolve a
191
-     * string name to a concrete class
192
-     */
193
-    public function addPrefix($prefix) {
194
-        $this->prefixes[] = $prefix;
195
-    }
196
-
197
-    /**
198
-     * Performs processing on modules, after being called you may
199
-     * use getElement() and getElements()
200
-     * @param $config Instance of HTMLPurifier_Config
201
-     */
202
-    public function setup($config) {
203
-
204
-        $this->trusted = $config->get('HTML.Trusted');
205
-
206
-        // generate
207
-        $this->doctype = $this->doctypes->make($config);
208
-        $modules = $this->doctype->modules;
209
-
210
-        // take out the default modules that aren't allowed
211
-        $lookup = $config->get('HTML.AllowedModules');
212
-        $special_cases = $config->get('HTML.CoreModules');
213
-
214
-        if (is_array($lookup)) {
215
-            foreach ($modules as $k => $m) {
216
-                if (isset($special_cases[$m])) continue;
217
-                if (!isset($lookup[$m])) unset($modules[$k]);
218
-            }
219
-        }
220
-
221
-        // custom modules
222
-        if ($config->get('HTML.Proprietary')) {
223
-            $modules[] = 'Proprietary';
224
-        }
225
-        if ($config->get('HTML.SafeObject')) {
226
-            $modules[] = 'SafeObject';
227
-        }
228
-        if ($config->get('HTML.SafeEmbed')) {
229
-            $modules[] = 'SafeEmbed';
230
-        }
231
-        if ($config->get('HTML.Nofollow')) {
232
-            $modules[] = 'Nofollow';
233
-        }
234
-        if ($config->get('HTML.TargetBlank')) {
235
-            $modules[] = 'TargetBlank';
236
-        }
237
-
238
-        // merge in custom modules
239
-        $modules = array_merge($modules, $this->userModules);
240
-
241
-        foreach ($modules as $module) {
242
-            $this->processModule($module);
243
-            $this->modules[$module]->setup($config);
244
-        }
245
-
246
-        foreach ($this->doctype->tidyModules as $module) {
247
-            $this->processModule($module);
248
-            $this->modules[$module]->setup($config);
249
-        }
250
-
251
-        // prepare any injectors
252
-        foreach ($this->modules as $module) {
253
-            $n = array();
254
-            foreach ($module->info_injector as $i => $injector) {
255
-                if (!is_object($injector)) {
256
-                    $class = "HTMLPurifier_Injector_$injector";
257
-                    $injector = new $class;
258
-                }
259
-                $n[$injector->name] = $injector;
260
-            }
261
-            $module->info_injector = $n;
262
-        }
263
-
264
-        // setup lookup table based on all valid modules
265
-        foreach ($this->modules as $module) {
266
-            foreach ($module->info as $name => $def) {
267
-                if (!isset($this->elementLookup[$name])) {
268
-                    $this->elementLookup[$name] = array();
269
-                }
270
-                $this->elementLookup[$name][] = $module->name;
271
-            }
272
-        }
273
-
274
-        // note the different choice
275
-        $this->contentSets = new HTMLPurifier_ContentSets(
276
-            // content set assembly deals with all possible modules,
277
-            // not just ones deemed to be "safe"
278
-            $this->modules
279
-        );
280
-        $this->attrCollections = new HTMLPurifier_AttrCollections(
281
-            $this->attrTypes,
282
-            // there is no way to directly disable a global attribute,
283
-            // but using AllowedAttributes or simply not including
284
-            // the module in your custom doctype should be sufficient
285
-            $this->modules
286
-        );
287
-    }
288
-
289
-    /**
290
-     * Takes a module and adds it to the active module collection,
291
-     * registering it if necessary.
292
-     */
293
-    public function processModule($module) {
294
-        if (!isset($this->registeredModules[$module]) || is_object($module)) {
295
-            $this->registerModule($module);
296
-        }
297
-        $this->modules[$module] = $this->registeredModules[$module];
298
-    }
299
-
300
-    /**
301
-     * Retrieves merged element definitions.
302
-     * @return Array of HTMLPurifier_ElementDef
303
-     */
304
-    public function getElements() {
305
-
306
-        $elements = array();
307
-        foreach ($this->modules as $module) {
308
-            if (!$this->trusted && !$module->safe) continue;
309
-            foreach ($module->info as $name => $v) {
310
-                if (isset($elements[$name])) continue;
311
-                $elements[$name] = $this->getElement($name);
312
-            }
313
-        }
314
-
315
-        // remove dud elements, this happens when an element that
316
-        // appeared to be safe actually wasn't
317
-        foreach ($elements as $n => $v) {
318
-            if ($v === false) unset($elements[$n]);
319
-        }
320
-
321
-        return $elements;
322
-
323
-    }
324
-
325
-    /**
326
-     * Retrieves a single merged element definition
327
-     * @param $name Name of element
328
-     * @param $trusted Boolean trusted overriding parameter: set to true
329
-     *                 if you want the full version of an element
330
-     * @return Merged HTMLPurifier_ElementDef
331
-     * @note You may notice that modules are getting iterated over twice (once
332
-     *       in getElements() and once here). This
333
-     *       is because
334
-     */
335
-    public function getElement($name, $trusted = null) {
336
-
337
-        if (!isset($this->elementLookup[$name])) {
338
-            return false;
339
-        }
340
-
341
-        // setup global state variables
342
-        $def = false;
343
-        if ($trusted === null) $trusted = $this->trusted;
344
-
345
-        // iterate through each module that has registered itself to this
346
-        // element
347
-        foreach($this->elementLookup[$name] as $module_name) {
348
-
349
-            $module = $this->modules[$module_name];
350
-
351
-            // refuse to create/merge from a module that is deemed unsafe--
352
-            // pretend the module doesn't exist--when trusted mode is not on.
353
-            if (!$trusted && !$module->safe) {
354
-                continue;
355
-            }
356
-
357
-            // clone is used because, ideally speaking, the original
358
-            // definition should not be modified. Usually, this will
359
-            // make no difference, but for consistency's sake
360
-            $new_def = clone $module->info[$name];
361
-
362
-            if (!$def && $new_def->standalone) {
363
-                $def = $new_def;
364
-            } elseif ($def) {
365
-                // This will occur even if $new_def is standalone. In practice,
366
-                // this will usually result in a full replacement.
367
-                $def->mergeIn($new_def);
368
-            } else {
369
-                // :TODO:
370
-                // non-standalone definitions that don't have a standalone
371
-                // to merge into could be deferred to the end
372
-                // HOWEVER, it is perfectly valid for a non-standalone
373
-                // definition to lack a standalone definition, even
374
-                // after all processing: this allows us to safely
375
-                // specify extra attributes for elements that may not be
376
-                // enabled all in one place.  In particular, this might
377
-                // be the case for trusted elements.  WARNING: care must
378
-                // be taken that the /extra/ definitions are all safe.
379
-                continue;
380
-            }
381
-
382
-            // attribute value expansions
383
-            $this->attrCollections->performInclusions($def->attr);
384
-            $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
385
-
386
-            // descendants_are_inline, for ChildDef_Chameleon
387
-            if (is_string($def->content_model) &&
388
-                strpos($def->content_model, 'Inline') !== false) {
389
-                if ($name != 'del' && $name != 'ins') {
390
-                    // this is for you, ins/del
391
-                    $def->descendants_are_inline = true;
392
-                }
393
-            }
394
-
395
-            $this->contentSets->generateChildDef($def, $module);
396
-        }
397
-
398
-        // This can occur if there is a blank definition, but no base to
399
-        // mix it in with
400
-        if (!$def) return false;
401
-
402
-        // add information on required attributes
403
-        foreach ($def->attr as $attr_name => $attr_def) {
404
-            if ($attr_def->required) {
405
-                $def->required_attr[] = $attr_name;
406
-            }
407
-        }
408
-
409
-        return $def;
410
-
411
-    }
6
+	/**
7
+	 * Instance of HTMLPurifier_DoctypeRegistry
8
+	 */
9
+	public $doctypes;
10
+
11
+	/**
12
+	 * Instance of current doctype
13
+	 */
14
+	public $doctype;
15
+
16
+	/**
17
+	 * Instance of HTMLPurifier_AttrTypes
18
+	 */
19
+	public $attrTypes;
20
+
21
+	/**
22
+	 * Active instances of modules for the specified doctype are
23
+	 * indexed, by name, in this array.
24
+	 */
25
+	public $modules = array();
26
+
27
+	/**
28
+	 * Array of recognized HTMLPurifier_Module instances, indexed by
29
+	 * module's class name. This array is usually lazy loaded, but a
30
+	 * user can overload a module by pre-emptively registering it.
31
+	 */
32
+	public $registeredModules = array();
33
+
34
+	/**
35
+	 * List of extra modules that were added by the user using addModule().
36
+	 * These get unconditionally merged into the current doctype, whatever
37
+	 * it may be.
38
+	 */
39
+	public $userModules = array();
40
+
41
+	/**
42
+	 * Associative array of element name to list of modules that have
43
+	 * definitions for the element; this array is dynamically filled.
44
+	 */
45
+	public $elementLookup = array();
46
+
47
+	/** List of prefixes we should use for registering small names */
48
+	public $prefixes = array('HTMLPurifier_HTMLModule_');
49
+
50
+	public $contentSets;     /**< Instance of HTMLPurifier_ContentSets */
51
+	public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
52
+
53
+	/** If set to true, unsafe elements and attributes will be allowed */
54
+	public $trusted = false;
55
+
56
+	public function __construct() {
57
+
58
+		// editable internal objects
59
+		$this->attrTypes = new HTMLPurifier_AttrTypes();
60
+		$this->doctypes  = new HTMLPurifier_DoctypeRegistry();
61
+
62
+		// setup basic modules
63
+		$common = array(
64
+			'CommonAttributes', 'Text', 'Hypertext', 'List',
65
+			'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',
66
+			'StyleAttribute',
67
+			// Unsafe:
68
+			'Scripting', 'Object', 'Forms',
69
+			// Sorta legacy, but present in strict:
70
+			'Name',
71
+		);
72
+		$transitional = array('Legacy', 'Target', 'Iframe');
73
+		$xml = array('XMLCommonAttributes');
74
+		$non_xml = array('NonXMLCommonAttributes');
75
+
76
+		// setup basic doctypes
77
+		$this->doctypes->register(
78
+			'HTML 4.01 Transitional', false,
79
+			array_merge($common, $transitional, $non_xml),
80
+			array('Tidy_Transitional', 'Tidy_Proprietary'),
81
+			array(),
82
+			'-//W3C//DTD HTML 4.01 Transitional//EN',
83
+			'http://www.w3.org/TR/html4/loose.dtd'
84
+		);
85
+
86
+		$this->doctypes->register(
87
+			'HTML 4.01 Strict', false,
88
+			array_merge($common, $non_xml),
89
+			array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
90
+			array(),
91
+			'-//W3C//DTD HTML 4.01//EN',
92
+			'http://www.w3.org/TR/html4/strict.dtd'
93
+		);
94
+
95
+		$this->doctypes->register(
96
+			'XHTML 1.0 Transitional', true,
97
+			array_merge($common, $transitional, $xml, $non_xml),
98
+			array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),
99
+			array(),
100
+			'-//W3C//DTD XHTML 1.0 Transitional//EN',
101
+			'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
102
+		);
103
+
104
+		$this->doctypes->register(
105
+			'XHTML 1.0 Strict', true,
106
+			array_merge($common, $xml, $non_xml),
107
+			array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),
108
+			array(),
109
+			'-//W3C//DTD XHTML 1.0 Strict//EN',
110
+			'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
111
+		);
112
+
113
+		$this->doctypes->register(
114
+			'XHTML 1.1', true,
115
+			// Iframe is a real XHTML 1.1 module, despite being
116
+			// "transitional"!
117
+			array_merge($common, $xml, array('Ruby', 'Iframe')),
118
+			array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1
119
+			array(),
120
+			'-//W3C//DTD XHTML 1.1//EN',
121
+			'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'
122
+		);
123
+
124
+	}
125
+
126
+	/**
127
+	 * Registers a module to the recognized module list, useful for
128
+	 * overloading pre-existing modules.
129
+	 * @param $module Mixed: string module name, with or without
130
+	 *                HTMLPurifier_HTMLModule prefix, or instance of
131
+	 *                subclass of HTMLPurifier_HTMLModule.
132
+	 * @param $overload Boolean whether or not to overload previous modules.
133
+	 *                  If this is not set, and you do overload a module,
134
+	 *                  HTML Purifier will complain with a warning.
135
+	 * @note This function will not call autoload, you must instantiate
136
+	 *       (and thus invoke) autoload outside the method.
137
+	 * @note If a string is passed as a module name, different variants
138
+	 *       will be tested in this order:
139
+	 *          - Check for HTMLPurifier_HTMLModule_$name
140
+	 *          - Check all prefixes with $name in order they were added
141
+	 *          - Check for literal object name
142
+	 *          - Throw fatal error
143
+	 *       If your object name collides with an internal class, specify
144
+	 *       your module manually. All modules must have been included
145
+	 *       externally: registerModule will not perform inclusions for you!
146
+	 */
147
+	public function registerModule($module, $overload = false) {
148
+		if (is_string($module)) {
149
+			// attempt to load the module
150
+			$original_module = $module;
151
+			$ok = false;
152
+			foreach ($this->prefixes as $prefix) {
153
+				$module = $prefix . $original_module;
154
+				if (class_exists($module)) {
155
+					$ok = true;
156
+					break;
157
+				}
158
+			}
159
+			if (!$ok) {
160
+				$module = $original_module;
161
+				if (!class_exists($module)) {
162
+					trigger_error($original_module . ' module does not exist',
163
+						E_USER_ERROR);
164
+					return;
165
+				}
166
+			}
167
+			$module = new $module();
168
+		}
169
+		if (empty($module->name)) {
170
+			trigger_error('Module instance of ' . get_class($module) . ' must have name');
171
+			return;
172
+		}
173
+		if (!$overload && isset($this->registeredModules[$module->name])) {
174
+			trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
175
+		}
176
+		$this->registeredModules[$module->name] = $module;
177
+	}
178
+
179
+	/**
180
+	 * Adds a module to the current doctype by first registering it,
181
+	 * and then tacking it on to the active doctype
182
+	 */
183
+	public function addModule($module) {
184
+		$this->registerModule($module);
185
+		if (is_object($module)) $module = $module->name;
186
+		$this->userModules[] = $module;
187
+	}
188
+
189
+	/**
190
+	 * Adds a class prefix that registerModule() will use to resolve a
191
+	 * string name to a concrete class
192
+	 */
193
+	public function addPrefix($prefix) {
194
+		$this->prefixes[] = $prefix;
195
+	}
196
+
197
+	/**
198
+	 * Performs processing on modules, after being called you may
199
+	 * use getElement() and getElements()
200
+	 * @param $config Instance of HTMLPurifier_Config
201
+	 */
202
+	public function setup($config) {
203
+
204
+		$this->trusted = $config->get('HTML.Trusted');
205
+
206
+		// generate
207
+		$this->doctype = $this->doctypes->make($config);
208
+		$modules = $this->doctype->modules;
209
+
210
+		// take out the default modules that aren't allowed
211
+		$lookup = $config->get('HTML.AllowedModules');
212
+		$special_cases = $config->get('HTML.CoreModules');
213
+
214
+		if (is_array($lookup)) {
215
+			foreach ($modules as $k => $m) {
216
+				if (isset($special_cases[$m])) continue;
217
+				if (!isset($lookup[$m])) unset($modules[$k]);
218
+			}
219
+		}
220
+
221
+		// custom modules
222
+		if ($config->get('HTML.Proprietary')) {
223
+			$modules[] = 'Proprietary';
224
+		}
225
+		if ($config->get('HTML.SafeObject')) {
226
+			$modules[] = 'SafeObject';
227
+		}
228
+		if ($config->get('HTML.SafeEmbed')) {
229
+			$modules[] = 'SafeEmbed';
230
+		}
231
+		if ($config->get('HTML.Nofollow')) {
232
+			$modules[] = 'Nofollow';
233
+		}
234
+		if ($config->get('HTML.TargetBlank')) {
235
+			$modules[] = 'TargetBlank';
236
+		}
237
+
238
+		// merge in custom modules
239
+		$modules = array_merge($modules, $this->userModules);
240
+
241
+		foreach ($modules as $module) {
242
+			$this->processModule($module);
243
+			$this->modules[$module]->setup($config);
244
+		}
245
+
246
+		foreach ($this->doctype->tidyModules as $module) {
247
+			$this->processModule($module);
248
+			$this->modules[$module]->setup($config);
249
+		}
250
+
251
+		// prepare any injectors
252
+		foreach ($this->modules as $module) {
253
+			$n = array();
254
+			foreach ($module->info_injector as $i => $injector) {
255
+				if (!is_object($injector)) {
256
+					$class = "HTMLPurifier_Injector_$injector";
257
+					$injector = new $class;
258
+				}
259
+				$n[$injector->name] = $injector;
260
+			}
261
+			$module->info_injector = $n;
262
+		}
263
+
264
+		// setup lookup table based on all valid modules
265
+		foreach ($this->modules as $module) {
266
+			foreach ($module->info as $name => $def) {
267
+				if (!isset($this->elementLookup[$name])) {
268
+					$this->elementLookup[$name] = array();
269
+				}
270
+				$this->elementLookup[$name][] = $module->name;
271
+			}
272
+		}
273
+
274
+		// note the different choice
275
+		$this->contentSets = new HTMLPurifier_ContentSets(
276
+			// content set assembly deals with all possible modules,
277
+			// not just ones deemed to be "safe"
278
+			$this->modules
279
+		);
280
+		$this->attrCollections = new HTMLPurifier_AttrCollections(
281
+			$this->attrTypes,
282
+			// there is no way to directly disable a global attribute,
283
+			// but using AllowedAttributes or simply not including
284
+			// the module in your custom doctype should be sufficient
285
+			$this->modules
286
+		);
287
+	}
288
+
289
+	/**
290
+	 * Takes a module and adds it to the active module collection,
291
+	 * registering it if necessary.
292
+	 */
293
+	public function processModule($module) {
294
+		if (!isset($this->registeredModules[$module]) || is_object($module)) {
295
+			$this->registerModule($module);
296
+		}
297
+		$this->modules[$module] = $this->registeredModules[$module];
298
+	}
299
+
300
+	/**
301
+	 * Retrieves merged element definitions.
302
+	 * @return Array of HTMLPurifier_ElementDef
303
+	 */
304
+	public function getElements() {
305
+
306
+		$elements = array();
307
+		foreach ($this->modules as $module) {
308
+			if (!$this->trusted && !$module->safe) continue;
309
+			foreach ($module->info as $name => $v) {
310
+				if (isset($elements[$name])) continue;
311
+				$elements[$name] = $this->getElement($name);
312
+			}
313
+		}
314
+
315
+		// remove dud elements, this happens when an element that
316
+		// appeared to be safe actually wasn't
317
+		foreach ($elements as $n => $v) {
318
+			if ($v === false) unset($elements[$n]);
319
+		}
320
+
321
+		return $elements;
322
+
323
+	}
324
+
325
+	/**
326
+	 * Retrieves a single merged element definition
327
+	 * @param $name Name of element
328
+	 * @param $trusted Boolean trusted overriding parameter: set to true
329
+	 *                 if you want the full version of an element
330
+	 * @return Merged HTMLPurifier_ElementDef
331
+	 * @note You may notice that modules are getting iterated over twice (once
332
+	 *       in getElements() and once here). This
333
+	 *       is because
334
+	 */
335
+	public function getElement($name, $trusted = null) {
336
+
337
+		if (!isset($this->elementLookup[$name])) {
338
+			return false;
339
+		}
340
+
341
+		// setup global state variables
342
+		$def = false;
343
+		if ($trusted === null) $trusted = $this->trusted;
344
+
345
+		// iterate through each module that has registered itself to this
346
+		// element
347
+		foreach($this->elementLookup[$name] as $module_name) {
348
+
349
+			$module = $this->modules[$module_name];
350
+
351
+			// refuse to create/merge from a module that is deemed unsafe--
352
+			// pretend the module doesn't exist--when trusted mode is not on.
353
+			if (!$trusted && !$module->safe) {
354
+				continue;
355
+			}
356
+
357
+			// clone is used because, ideally speaking, the original
358
+			// definition should not be modified. Usually, this will
359
+			// make no difference, but for consistency's sake
360
+			$new_def = clone $module->info[$name];
361
+
362
+			if (!$def && $new_def->standalone) {
363
+				$def = $new_def;
364
+			} elseif ($def) {
365
+				// This will occur even if $new_def is standalone. In practice,
366
+				// this will usually result in a full replacement.
367
+				$def->mergeIn($new_def);
368
+			} else {
369
+				// :TODO:
370
+				// non-standalone definitions that don't have a standalone
371
+				// to merge into could be deferred to the end
372
+				// HOWEVER, it is perfectly valid for a non-standalone
373
+				// definition to lack a standalone definition, even
374
+				// after all processing: this allows us to safely
375
+				// specify extra attributes for elements that may not be
376
+				// enabled all in one place.  In particular, this might
377
+				// be the case for trusted elements.  WARNING: care must
378
+				// be taken that the /extra/ definitions are all safe.
379
+				continue;
380
+			}
381
+
382
+			// attribute value expansions
383
+			$this->attrCollections->performInclusions($def->attr);
384
+			$this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);
385
+
386
+			// descendants_are_inline, for ChildDef_Chameleon
387
+			if (is_string($def->content_model) &&
388
+				strpos($def->content_model, 'Inline') !== false) {
389
+				if ($name != 'del' && $name != 'ins') {
390
+					// this is for you, ins/del
391
+					$def->descendants_are_inline = true;
392
+				}
393
+			}
394
+
395
+			$this->contentSets->generateChildDef($def, $module);
396
+		}
397
+
398
+		// This can occur if there is a blank definition, but no base to
399
+		// mix it in with
400
+		if (!$def) return false;
401
+
402
+		// add information on required attributes
403
+		foreach ($def->attr as $attr_name => $attr_def) {
404
+			if ($attr_def->required) {
405
+				$def->required_attr[] = $attr_name;
406
+			}
407
+		}
408
+
409
+		return $def;
410
+
411
+	}
412 412
 
413 413
 }
414 414
 
Please login to merge, or discard this patch.
Braces   +24 added lines, -8 removed lines patch added patch discarded remove patch
@@ -182,7 +182,9 @@  discard block
 block discarded – undo
182 182
      */
183 183
     public function addModule($module) {
184 184
         $this->registerModule($module);
185
-        if (is_object($module)) $module = $module->name;
185
+        if (is_object($module)) {
186
+        	$module = $module->name;
187
+        }
186 188
         $this->userModules[] = $module;
187 189
     }
188 190
 
@@ -213,8 +215,12 @@  discard block
 block discarded – undo
213 215
 
214 216
         if (is_array($lookup)) {
215 217
             foreach ($modules as $k => $m) {
216
-                if (isset($special_cases[$m])) continue;
217
-                if (!isset($lookup[$m])) unset($modules[$k]);
218
+                if (isset($special_cases[$m])) {
219
+                	continue;
220
+                }
221
+                if (!isset($lookup[$m])) {
222
+                	unset($modules[$k]);
223
+                }
218 224
             }
219 225
         }
220 226
 
@@ -305,9 +311,13 @@  discard block
 block discarded – undo
305 311
 
306 312
         $elements = array();
307 313
         foreach ($this->modules as $module) {
308
-            if (!$this->trusted && !$module->safe) continue;
314
+            if (!$this->trusted && !$module->safe) {
315
+            	continue;
316
+            }
309 317
             foreach ($module->info as $name => $v) {
310
-                if (isset($elements[$name])) continue;
318
+                if (isset($elements[$name])) {
319
+                	continue;
320
+                }
311 321
                 $elements[$name] = $this->getElement($name);
312 322
             }
313 323
         }
@@ -315,7 +325,9 @@  discard block
 block discarded – undo
315 325
         // remove dud elements, this happens when an element that
316 326
         // appeared to be safe actually wasn't
317 327
         foreach ($elements as $n => $v) {
318
-            if ($v === false) unset($elements[$n]);
328
+            if ($v === false) {
329
+            	unset($elements[$n]);
330
+            }
319 331
         }
320 332
 
321 333
         return $elements;
@@ -340,7 +352,9 @@  discard block
 block discarded – undo
340 352
 
341 353
         // setup global state variables
342 354
         $def = false;
343
-        if ($trusted === null) $trusted = $this->trusted;
355
+        if ($trusted === null) {
356
+        	$trusted = $this->trusted;
357
+        }
344 358
 
345 359
         // iterate through each module that has registered itself to this
346 360
         // element
@@ -397,7 +411,9 @@  discard block
 block discarded – undo
397 411
 
398 412
         // This can occur if there is a blank definition, but no base to
399 413
         // mix it in with
400
-        if (!$def) return false;
414
+        if (!$def) {
415
+        	return false;
416
+        }
401 417
 
402 418
         // add information on required attributes
403 419
         foreach ($def->attr as $attr_name => $attr_def) {
Please login to merge, or discard this patch.
Spacing   +6 added lines, -6 removed lines patch added patch discarded remove patch
@@ -47,7 +47,7 @@  discard block
 block discarded – undo
47 47
     /** List of prefixes we should use for registering small names */
48 48
     public $prefixes = array('HTMLPurifier_HTMLModule_');
49 49
 
50
-    public $contentSets;     /**< Instance of HTMLPurifier_ContentSets */
50
+    public $contentSets; /**< Instance of HTMLPurifier_ContentSets */
51 51
     public $attrCollections; /**< Instance of HTMLPurifier_AttrCollections */
52 52
 
53 53
     /** If set to true, unsafe elements and attributes will be allowed */
@@ -150,7 +150,7 @@  discard block
 block discarded – undo
150 150
             $original_module = $module;
151 151
             $ok = false;
152 152
             foreach ($this->prefixes as $prefix) {
153
-                $module = $prefix . $original_module;
153
+                $module = $prefix.$original_module;
154 154
                 if (class_exists($module)) {
155 155
                     $ok = true;
156 156
                     break;
@@ -159,7 +159,7 @@  discard block
 block discarded – undo
159 159
             if (!$ok) {
160 160
                 $module = $original_module;
161 161
                 if (!class_exists($module)) {
162
-                    trigger_error($original_module . ' module does not exist',
162
+                    trigger_error($original_module.' module does not exist',
163 163
                         E_USER_ERROR);
164 164
                     return;
165 165
                 }
@@ -167,11 +167,11 @@  discard block
 block discarded – undo
167 167
             $module = new $module();
168 168
         }
169 169
         if (empty($module->name)) {
170
-            trigger_error('Module instance of ' . get_class($module) . ' must have name');
170
+            trigger_error('Module instance of '.get_class($module).' must have name');
171 171
             return;
172 172
         }
173 173
         if (!$overload && isset($this->registeredModules[$module->name])) {
174
-            trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);
174
+            trigger_error('Overloading '.$module->name.' without explicit overload parameter', E_USER_WARNING);
175 175
         }
176 176
         $this->registeredModules[$module->name] = $module;
177 177
     }
@@ -344,7 +344,7 @@  discard block
 block discarded – undo
344 344
 
345 345
         // iterate through each module that has registered itself to this
346 346
         // element
347
-        foreach($this->elementLookup[$name] as $module_name) {
347
+        foreach ($this->elementLookup[$name] as $module_name) {
348 348
 
349 349
             $module = $this->modules[$module_name];
350 350
 
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/IDAccumulator.php 3 patches
Doc Comments   +1 added lines, -1 removed lines patch added patch discarded remove patch
@@ -19,7 +19,7 @@
 block discarded – undo
19 19
      * Builds an IDAccumulator, also initializing the default blacklist
20 20
      * @param $config Instance of HTMLPurifier_Config
21 21
      * @param $context Instance of HTMLPurifier_Context
22
-     * @return Fully initialized HTMLPurifier_IDAccumulator
22
+     * @return HTMLPurifier_IDAccumulator initialized HTMLPurifier_IDAccumulator
23 23
      */
24 24
     public static function build($config, $context) {
25 25
         $id_accumulator = new HTMLPurifier_IDAccumulator();
Please login to merge, or discard this patch.
Indentation   +35 added lines, -35 removed lines patch added patch discarded remove patch
@@ -9,44 +9,44 @@
 block discarded – undo
9 9
 class HTMLPurifier_IDAccumulator
10 10
 {
11 11
 
12
-    /**
13
-     * Lookup table of IDs we've accumulated.
14
-     * @public
15
-     */
16
-    public $ids = array();
12
+	/**
13
+	 * Lookup table of IDs we've accumulated.
14
+	 * @public
15
+	 */
16
+	public $ids = array();
17 17
 
18
-    /**
19
-     * Builds an IDAccumulator, also initializing the default blacklist
20
-     * @param $config Instance of HTMLPurifier_Config
21
-     * @param $context Instance of HTMLPurifier_Context
22
-     * @return Fully initialized HTMLPurifier_IDAccumulator
23
-     */
24
-    public static function build($config, $context) {
25
-        $id_accumulator = new HTMLPurifier_IDAccumulator();
26
-        $id_accumulator->load($config->get('Attr.IDBlacklist'));
27
-        return $id_accumulator;
28
-    }
18
+	/**
19
+	 * Builds an IDAccumulator, also initializing the default blacklist
20
+	 * @param $config Instance of HTMLPurifier_Config
21
+	 * @param $context Instance of HTMLPurifier_Context
22
+	 * @return Fully initialized HTMLPurifier_IDAccumulator
23
+	 */
24
+	public static function build($config, $context) {
25
+		$id_accumulator = new HTMLPurifier_IDAccumulator();
26
+		$id_accumulator->load($config->get('Attr.IDBlacklist'));
27
+		return $id_accumulator;
28
+	}
29 29
 
30
-    /**
31
-     * Add an ID to the lookup table.
32
-     * @param $id ID to be added.
33
-     * @return Bool status, true if success, false if there's a dupe
34
-     */
35
-    public function add($id) {
36
-        if (isset($this->ids[$id])) return false;
37
-        return $this->ids[$id] = true;
38
-    }
30
+	/**
31
+	 * Add an ID to the lookup table.
32
+	 * @param $id ID to be added.
33
+	 * @return Bool status, true if success, false if there's a dupe
34
+	 */
35
+	public function add($id) {
36
+		if (isset($this->ids[$id])) return false;
37
+		return $this->ids[$id] = true;
38
+	}
39 39
 
40
-    /**
41
-     * Load a list of IDs into the lookup table
42
-     * @param $array_of_ids Array of IDs to load
43
-     * @note This function doesn't care about duplicates
44
-     */
45
-    public function load($array_of_ids) {
46
-        foreach ($array_of_ids as $id) {
47
-            $this->ids[$id] = true;
48
-        }
49
-    }
40
+	/**
41
+	 * Load a list of IDs into the lookup table
42
+	 * @param $array_of_ids Array of IDs to load
43
+	 * @note This function doesn't care about duplicates
44
+	 */
45
+	public function load($array_of_ids) {
46
+		foreach ($array_of_ids as $id) {
47
+			$this->ids[$id] = true;
48
+		}
49
+	}
50 50
 
51 51
 }
52 52
 
Please login to merge, or discard this patch.
Braces   +3 added lines, -1 removed lines patch added patch discarded remove patch
@@ -33,7 +33,9 @@
 block discarded – undo
33 33
      * @return Bool status, true if success, false if there's a dupe
34 34
      */
35 35
     public function add($id) {
36
-        if (isset($this->ids[$id])) return false;
36
+        if (isset($this->ids[$id])) {
37
+        	return false;
38
+        }
37 39
         return $this->ids[$id] = true;
38 40
     }
39 41
 
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/Injector.php 4 patches
Doc Comments   +3 added lines, -2 removed lines patch added patch discarded remove patch
@@ -64,6 +64,7 @@  discard block
 block discarded – undo
64 64
      * result in infinite loops if not used carefully.
65 65
      * @warning HTML Purifier will prevent you from fast-forwarding with this
66 66
      *          function.
67
+     * @param integer $index
67 68
      */
68 69
     public function rewind($index) {
69 70
         $this->rewind = $index;
@@ -123,8 +124,8 @@  discard block
 block discarded – undo
123 124
 
124 125
     /**
125 126
      * Tests if the context node allows a certain element
126
-     * @param $name Name of element to test for
127
-     * @return True if element is allowed, false if it is not
127
+     * @param string $name Name of element to test for
128
+     * @return boolean if element is allowed, false if it is not
128 129
      */
129 130
     public function allowsElement($name) {
130 131
         if (!empty($this->currentNesting)) {
Please login to merge, or discard this patch.
Indentation   +216 added lines, -216 removed lines patch added patch discarded remove patch
@@ -16,222 +16,222 @@
 block discarded – undo
16 16
 abstract class HTMLPurifier_Injector
17 17
 {
18 18
 
19
-    /**
20
-     * Advisory name of injector, this is for friendly error messages
21
-     */
22
-    public $name;
23
-
24
-    /**
25
-     * Instance of HTMLPurifier_HTMLDefinition
26
-     */
27
-    protected $htmlDefinition;
28
-
29
-    /**
30
-     * Reference to CurrentNesting variable in Context. This is an array
31
-     * list of tokens that we are currently "inside"
32
-     */
33
-    protected $currentNesting;
34
-
35
-    /**
36
-     * Reference to InputTokens variable in Context. This is an array
37
-     * list of the input tokens that are being processed.
38
-     */
39
-    protected $inputTokens;
40
-
41
-    /**
42
-     * Reference to InputIndex variable in Context. This is an integer
43
-     * array index for $this->inputTokens that indicates what token
44
-     * is currently being processed.
45
-     */
46
-    protected $inputIndex;
47
-
48
-    /**
49
-     * Array of elements and attributes this injector creates and therefore
50
-     * need to be allowed by the definition. Takes form of
51
-     * array('element' => array('attr', 'attr2'), 'element2')
52
-     */
53
-    public $needed = array();
54
-
55
-    /**
56
-     * Index of inputTokens to rewind to.
57
-     */
58
-    protected $rewind = false;
59
-
60
-    /**
61
-     * Rewind to a spot to re-perform processing. This is useful if you
62
-     * deleted a node, and now need to see if this change affected any
63
-     * earlier nodes. Rewinding does not affect other injectors, and can
64
-     * result in infinite loops if not used carefully.
65
-     * @warning HTML Purifier will prevent you from fast-forwarding with this
66
-     *          function.
67
-     */
68
-    public function rewind($index) {
69
-        $this->rewind = $index;
70
-    }
71
-
72
-    /**
73
-     * Retrieves rewind, and then unsets it.
74
-     */
75
-    public function getRewind() {
76
-        $r = $this->rewind;
77
-        $this->rewind = false;
78
-        return $r;
79
-    }
80
-
81
-    /**
82
-     * Prepares the injector by giving it the config and context objects:
83
-     * this allows references to important variables to be made within
84
-     * the injector. This function also checks if the HTML environment
85
-     * will work with the Injector (see checkNeeded()).
86
-     * @param $config Instance of HTMLPurifier_Config
87
-     * @param $context Instance of HTMLPurifier_Context
88
-     * @return Boolean false if success, string of missing needed element/attribute if failure
89
-     */
90
-    public function prepare($config, $context) {
91
-        $this->htmlDefinition = $config->getHTMLDefinition();
92
-        // Even though this might fail, some unit tests ignore this and
93
-        // still test checkNeeded, so be careful. Maybe get rid of that
94
-        // dependency.
95
-        $result = $this->checkNeeded($config);
96
-        if ($result !== false) return $result;
97
-        $this->currentNesting =& $context->get('CurrentNesting');
98
-        $this->inputTokens    =& $context->get('InputTokens');
99
-        $this->inputIndex     =& $context->get('InputIndex');
100
-        return false;
101
-    }
102
-
103
-    /**
104
-     * This function checks if the HTML environment
105
-     * will work with the Injector: if p tags are not allowed, the
106
-     * Auto-Paragraphing injector should not be enabled.
107
-     * @param $config Instance of HTMLPurifier_Config
108
-     * @param $context Instance of HTMLPurifier_Context
109
-     * @return Boolean false if success, string of missing needed element/attribute if failure
110
-     */
111
-    public function checkNeeded($config) {
112
-        $def = $config->getHTMLDefinition();
113
-        foreach ($this->needed as $element => $attributes) {
114
-            if (is_int($element)) $element = $attributes;
115
-            if (!isset($def->info[$element])) return $element;
116
-            if (!is_array($attributes)) continue;
117
-            foreach ($attributes as $name) {
118
-                if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
119
-            }
120
-        }
121
-        return false;
122
-    }
123
-
124
-    /**
125
-     * Tests if the context node allows a certain element
126
-     * @param $name Name of element to test for
127
-     * @return True if element is allowed, false if it is not
128
-     */
129
-    public function allowsElement($name) {
130
-        if (!empty($this->currentNesting)) {
131
-            $parent_token = array_pop($this->currentNesting);
132
-            $this->currentNesting[] = $parent_token;
133
-            $parent = $this->htmlDefinition->info[$parent_token->name];
134
-        } else {
135
-            $parent = $this->htmlDefinition->info_parent_def;
136
-        }
137
-        if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
138
-            return false;
139
-        }
140
-        // check for exclusion
141
-        for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
142
-            $node = $this->currentNesting[$i];
143
-            $def  = $this->htmlDefinition->info[$node->name];
144
-            if (isset($def->excludes[$name])) return false;
145
-        }
146
-        return true;
147
-    }
148
-
149
-    /**
150
-     * Iterator function, which starts with the next token and continues until
151
-     * you reach the end of the input tokens.
152
-     * @warning Please prevent previous references from interfering with this
153
-     *          functions by setting $i = null beforehand!
154
-     * @param &$i Current integer index variable for inputTokens
155
-     * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
156
-     */
157
-    protected function forward(&$i, &$current) {
158
-        if ($i === null) $i = $this->inputIndex + 1;
159
-        else $i++;
160
-        if (!isset($this->inputTokens[$i])) return false;
161
-        $current = $this->inputTokens[$i];
162
-        return true;
163
-    }
164
-
165
-    /**
166
-     * Similar to _forward, but accepts a third parameter $nesting (which
167
-     * should be initialized at 0) and stops when we hit the end tag
168
-     * for the node $this->inputIndex starts in.
169
-     */
170
-    protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
171
-        $result = $this->forward($i, $current);
172
-        if (!$result) return false;
173
-        if ($nesting === null) $nesting = 0;
174
-        if     ($current instanceof HTMLPurifier_Token_Start) $nesting++;
175
-        elseif ($current instanceof HTMLPurifier_Token_End) {
176
-            if ($nesting <= 0) return false;
177
-            $nesting--;
178
-        }
179
-        return true;
180
-    }
181
-
182
-    /**
183
-     * Iterator function, starts with the previous token and continues until
184
-     * you reach the beginning of input tokens.
185
-     * @warning Please prevent previous references from interfering with this
186
-     *          functions by setting $i = null beforehand!
187
-     * @param &$i Current integer index variable for inputTokens
188
-     * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
189
-     */
190
-    protected function backward(&$i, &$current) {
191
-        if ($i === null) $i = $this->inputIndex - 1;
192
-        else $i--;
193
-        if ($i < 0) return false;
194
-        $current = $this->inputTokens[$i];
195
-        return true;
196
-    }
197
-
198
-    /**
199
-     * Initializes the iterator at the current position. Use in a do {} while;
200
-     * loop to force the _forward and _backward functions to start at the
201
-     * current location.
202
-     * @warning Please prevent previous references from interfering with this
203
-     *          functions by setting $i = null beforehand!
204
-     * @param &$i Current integer index variable for inputTokens
205
-     * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
206
-     */
207
-    protected function current(&$i, &$current) {
208
-        if ($i === null) $i = $this->inputIndex;
209
-        $current = $this->inputTokens[$i];
210
-    }
211
-
212
-    /**
213
-     * Handler that is called when a text token is processed
214
-     */
215
-    public function handleText(&$token) {}
216
-
217
-    /**
218
-     * Handler that is called when a start or empty token is processed
219
-     */
220
-    public function handleElement(&$token) {}
221
-
222
-    /**
223
-     * Handler that is called when an end token is processed
224
-     */
225
-    public function handleEnd(&$token) {
226
-        $this->notifyEnd($token);
227
-    }
228
-
229
-    /**
230
-     * Notifier that is called when an end token is processed
231
-     * @note This differs from handlers in that the token is read-only
232
-     * @deprecated
233
-     */
234
-    public function notifyEnd($token) {}
19
+	/**
20
+	 * Advisory name of injector, this is for friendly error messages
21
+	 */
22
+	public $name;
23
+
24
+	/**
25
+	 * Instance of HTMLPurifier_HTMLDefinition
26
+	 */
27
+	protected $htmlDefinition;
28
+
29
+	/**
30
+	 * Reference to CurrentNesting variable in Context. This is an array
31
+	 * list of tokens that we are currently "inside"
32
+	 */
33
+	protected $currentNesting;
34
+
35
+	/**
36
+	 * Reference to InputTokens variable in Context. This is an array
37
+	 * list of the input tokens that are being processed.
38
+	 */
39
+	protected $inputTokens;
40
+
41
+	/**
42
+	 * Reference to InputIndex variable in Context. This is an integer
43
+	 * array index for $this->inputTokens that indicates what token
44
+	 * is currently being processed.
45
+	 */
46
+	protected $inputIndex;
47
+
48
+	/**
49
+	 * Array of elements and attributes this injector creates and therefore
50
+	 * need to be allowed by the definition. Takes form of
51
+	 * array('element' => array('attr', 'attr2'), 'element2')
52
+	 */
53
+	public $needed = array();
54
+
55
+	/**
56
+	 * Index of inputTokens to rewind to.
57
+	 */
58
+	protected $rewind = false;
59
+
60
+	/**
61
+	 * Rewind to a spot to re-perform processing. This is useful if you
62
+	 * deleted a node, and now need to see if this change affected any
63
+	 * earlier nodes. Rewinding does not affect other injectors, and can
64
+	 * result in infinite loops if not used carefully.
65
+	 * @warning HTML Purifier will prevent you from fast-forwarding with this
66
+	 *          function.
67
+	 */
68
+	public function rewind($index) {
69
+		$this->rewind = $index;
70
+	}
71
+
72
+	/**
73
+	 * Retrieves rewind, and then unsets it.
74
+	 */
75
+	public function getRewind() {
76
+		$r = $this->rewind;
77
+		$this->rewind = false;
78
+		return $r;
79
+	}
80
+
81
+	/**
82
+	 * Prepares the injector by giving it the config and context objects:
83
+	 * this allows references to important variables to be made within
84
+	 * the injector. This function also checks if the HTML environment
85
+	 * will work with the Injector (see checkNeeded()).
86
+	 * @param $config Instance of HTMLPurifier_Config
87
+	 * @param $context Instance of HTMLPurifier_Context
88
+	 * @return Boolean false if success, string of missing needed element/attribute if failure
89
+	 */
90
+	public function prepare($config, $context) {
91
+		$this->htmlDefinition = $config->getHTMLDefinition();
92
+		// Even though this might fail, some unit tests ignore this and
93
+		// still test checkNeeded, so be careful. Maybe get rid of that
94
+		// dependency.
95
+		$result = $this->checkNeeded($config);
96
+		if ($result !== false) return $result;
97
+		$this->currentNesting =& $context->get('CurrentNesting');
98
+		$this->inputTokens    =& $context->get('InputTokens');
99
+		$this->inputIndex     =& $context->get('InputIndex');
100
+		return false;
101
+	}
102
+
103
+	/**
104
+	 * This function checks if the HTML environment
105
+	 * will work with the Injector: if p tags are not allowed, the
106
+	 * Auto-Paragraphing injector should not be enabled.
107
+	 * @param $config Instance of HTMLPurifier_Config
108
+	 * @param $context Instance of HTMLPurifier_Context
109
+	 * @return Boolean false if success, string of missing needed element/attribute if failure
110
+	 */
111
+	public function checkNeeded($config) {
112
+		$def = $config->getHTMLDefinition();
113
+		foreach ($this->needed as $element => $attributes) {
114
+			if (is_int($element)) $element = $attributes;
115
+			if (!isset($def->info[$element])) return $element;
116
+			if (!is_array($attributes)) continue;
117
+			foreach ($attributes as $name) {
118
+				if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
119
+			}
120
+		}
121
+		return false;
122
+	}
123
+
124
+	/**
125
+	 * Tests if the context node allows a certain element
126
+	 * @param $name Name of element to test for
127
+	 * @return True if element is allowed, false if it is not
128
+	 */
129
+	public function allowsElement($name) {
130
+		if (!empty($this->currentNesting)) {
131
+			$parent_token = array_pop($this->currentNesting);
132
+			$this->currentNesting[] = $parent_token;
133
+			$parent = $this->htmlDefinition->info[$parent_token->name];
134
+		} else {
135
+			$parent = $this->htmlDefinition->info_parent_def;
136
+		}
137
+		if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) {
138
+			return false;
139
+		}
140
+		// check for exclusion
141
+		for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
142
+			$node = $this->currentNesting[$i];
143
+			$def  = $this->htmlDefinition->info[$node->name];
144
+			if (isset($def->excludes[$name])) return false;
145
+		}
146
+		return true;
147
+	}
148
+
149
+	/**
150
+	 * Iterator function, which starts with the next token and continues until
151
+	 * you reach the end of the input tokens.
152
+	 * @warning Please prevent previous references from interfering with this
153
+	 *          functions by setting $i = null beforehand!
154
+	 * @param &$i Current integer index variable for inputTokens
155
+	 * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
156
+	 */
157
+	protected function forward(&$i, &$current) {
158
+		if ($i === null) $i = $this->inputIndex + 1;
159
+		else $i++;
160
+		if (!isset($this->inputTokens[$i])) return false;
161
+		$current = $this->inputTokens[$i];
162
+		return true;
163
+	}
164
+
165
+	/**
166
+	 * Similar to _forward, but accepts a third parameter $nesting (which
167
+	 * should be initialized at 0) and stops when we hit the end tag
168
+	 * for the node $this->inputIndex starts in.
169
+	 */
170
+	protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
171
+		$result = $this->forward($i, $current);
172
+		if (!$result) return false;
173
+		if ($nesting === null) $nesting = 0;
174
+		if     ($current instanceof HTMLPurifier_Token_Start) $nesting++;
175
+		elseif ($current instanceof HTMLPurifier_Token_End) {
176
+			if ($nesting <= 0) return false;
177
+			$nesting--;
178
+		}
179
+		return true;
180
+	}
181
+
182
+	/**
183
+	 * Iterator function, starts with the previous token and continues until
184
+	 * you reach the beginning of input tokens.
185
+	 * @warning Please prevent previous references from interfering with this
186
+	 *          functions by setting $i = null beforehand!
187
+	 * @param &$i Current integer index variable for inputTokens
188
+	 * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
189
+	 */
190
+	protected function backward(&$i, &$current) {
191
+		if ($i === null) $i = $this->inputIndex - 1;
192
+		else $i--;
193
+		if ($i < 0) return false;
194
+		$current = $this->inputTokens[$i];
195
+		return true;
196
+	}
197
+
198
+	/**
199
+	 * Initializes the iterator at the current position. Use in a do {} while;
200
+	 * loop to force the _forward and _backward functions to start at the
201
+	 * current location.
202
+	 * @warning Please prevent previous references from interfering with this
203
+	 *          functions by setting $i = null beforehand!
204
+	 * @param &$i Current integer index variable for inputTokens
205
+	 * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
206
+	 */
207
+	protected function current(&$i, &$current) {
208
+		if ($i === null) $i = $this->inputIndex;
209
+		$current = $this->inputTokens[$i];
210
+	}
211
+
212
+	/**
213
+	 * Handler that is called when a text token is processed
214
+	 */
215
+	public function handleText(&$token) {}
216
+
217
+	/**
218
+	 * Handler that is called when a start or empty token is processed
219
+	 */
220
+	public function handleElement(&$token) {}
221
+
222
+	/**
223
+	 * Handler that is called when an end token is processed
224
+	 */
225
+	public function handleEnd(&$token) {
226
+		$this->notifyEnd($token);
227
+	}
228
+
229
+	/**
230
+	 * Notifier that is called when an end token is processed
231
+	 * @note This differs from handlers in that the token is read-only
232
+	 * @deprecated
233
+	 */
234
+	public function notifyEnd($token) {}
235 235
 
236 236
 
237 237
 }
Please login to merge, or discard this patch.
Spacing   +4 added lines, -4 removed lines patch added patch discarded remove patch
@@ -94,9 +94,9 @@  discard block
 block discarded – undo
94 94
         // dependency.
95 95
         $result = $this->checkNeeded($config);
96 96
         if ($result !== false) return $result;
97
-        $this->currentNesting =& $context->get('CurrentNesting');
98
-        $this->inputTokens    =& $context->get('InputTokens');
99
-        $this->inputIndex     =& $context->get('InputIndex');
97
+        $this->currentNesting = & $context->get('CurrentNesting');
98
+        $this->inputTokens    = & $context->get('InputTokens');
99
+        $this->inputIndex     = & $context->get('InputIndex');
100 100
         return false;
101 101
     }
102 102
 
@@ -171,7 +171,7 @@  discard block
 block discarded – undo
171 171
         $result = $this->forward($i, $current);
172 172
         if (!$result) return false;
173 173
         if ($nesting === null) $nesting = 0;
174
-        if     ($current instanceof HTMLPurifier_Token_Start) $nesting++;
174
+        if ($current instanceof HTMLPurifier_Token_Start) $nesting++;
175 175
         elseif ($current instanceof HTMLPurifier_Token_End) {
176 176
             if ($nesting <= 0) return false;
177 177
             $nesting--;
Please login to merge, or discard this patch.
Braces   +49 added lines, -18 removed lines patch added patch discarded remove patch
@@ -93,7 +93,9 @@  discard block
 block discarded – undo
93 93
         // still test checkNeeded, so be careful. Maybe get rid of that
94 94
         // dependency.
95 95
         $result = $this->checkNeeded($config);
96
-        if ($result !== false) return $result;
96
+        if ($result !== false) {
97
+        	return $result;
98
+        }
97 99
         $this->currentNesting =& $context->get('CurrentNesting');
98 100
         $this->inputTokens    =& $context->get('InputTokens');
99 101
         $this->inputIndex     =& $context->get('InputIndex');
@@ -111,11 +113,19 @@  discard block
 block discarded – undo
111 113
     public function checkNeeded($config) {
112 114
         $def = $config->getHTMLDefinition();
113 115
         foreach ($this->needed as $element => $attributes) {
114
-            if (is_int($element)) $element = $attributes;
115
-            if (!isset($def->info[$element])) return $element;
116
-            if (!is_array($attributes)) continue;
116
+            if (is_int($element)) {
117
+            	$element = $attributes;
118
+            }
119
+            if (!isset($def->info[$element])) {
120
+            	return $element;
121
+            }
122
+            if (!is_array($attributes)) {
123
+            	continue;
124
+            }
117 125
             foreach ($attributes as $name) {
118
-                if (!isset($def->info[$element]->attr[$name])) return "$element.$name";
126
+                if (!isset($def->info[$element]->attr[$name])) {
127
+                	return "$element.$name";
128
+                }
119 129
             }
120 130
         }
121 131
         return false;
@@ -141,7 +151,9 @@  discard block
 block discarded – undo
141 151
         for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) {
142 152
             $node = $this->currentNesting[$i];
143 153
             $def  = $this->htmlDefinition->info[$node->name];
144
-            if (isset($def->excludes[$name])) return false;
154
+            if (isset($def->excludes[$name])) {
155
+            	return false;
156
+            }
145 157
         }
146 158
         return true;
147 159
     }
@@ -155,9 +167,14 @@  discard block
 block discarded – undo
155 167
      * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
156 168
      */
157 169
     protected function forward(&$i, &$current) {
158
-        if ($i === null) $i = $this->inputIndex + 1;
159
-        else $i++;
160
-        if (!isset($this->inputTokens[$i])) return false;
170
+        if ($i === null) {
171
+        	$i = $this->inputIndex + 1;
172
+        } else {
173
+        	$i++;
174
+        }
175
+        if (!isset($this->inputTokens[$i])) {
176
+        	return false;
177
+        }
161 178
         $current = $this->inputTokens[$i];
162 179
         return true;
163 180
     }
@@ -169,11 +186,18 @@  discard block
 block discarded – undo
169 186
      */
170 187
     protected function forwardUntilEndToken(&$i, &$current, &$nesting) {
171 188
         $result = $this->forward($i, $current);
172
-        if (!$result) return false;
173
-        if ($nesting === null) $nesting = 0;
174
-        if     ($current instanceof HTMLPurifier_Token_Start) $nesting++;
175
-        elseif ($current instanceof HTMLPurifier_Token_End) {
176
-            if ($nesting <= 0) return false;
189
+        if (!$result) {
190
+        	return false;
191
+        }
192
+        if ($nesting === null) {
193
+        	$nesting = 0;
194
+        }
195
+        if     ($current instanceof HTMLPurifier_Token_Start) {
196
+        	$nesting++;
197
+        } elseif ($current instanceof HTMLPurifier_Token_End) {
198
+            if ($nesting <= 0) {
199
+            	return false;
200
+            }
177 201
             $nesting--;
178 202
         }
179 203
         return true;
@@ -188,9 +212,14 @@  discard block
 block discarded – undo
188 212
      * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
189 213
      */
190 214
     protected function backward(&$i, &$current) {
191
-        if ($i === null) $i = $this->inputIndex - 1;
192
-        else $i--;
193
-        if ($i < 0) return false;
215
+        if ($i === null) {
216
+        	$i = $this->inputIndex - 1;
217
+        } else {
218
+        	$i--;
219
+        }
220
+        if ($i < 0) {
221
+        	return false;
222
+        }
194 223
         $current = $this->inputTokens[$i];
195 224
         return true;
196 225
     }
@@ -205,7 +234,9 @@  discard block
 block discarded – undo
205 234
      * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference
206 235
      */
207 236
     protected function current(&$i, &$current) {
208
-        if ($i === null) $i = $this->inputIndex;
237
+        if ($i === null) {
238
+        	$i = $this->inputIndex;
239
+        }
209 240
         $current = $this->inputTokens[$i];
210 241
     }
211 242
 
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/Language.php 4 patches
Doc Comments   +1 added lines, -1 removed lines patch added patch discarded remove patch
@@ -66,7 +66,7 @@
 block discarded – undo
66 66
 
67 67
     /**
68 68
      * Retrieves a localised message.
69
-     * @param $key string identifier of message
69
+     * @param string $key string identifier of message
70 70
      * @return string localised message
71 71
      */
72 72
     public function getMessage($key) {
Please login to merge, or discard this patch.
Indentation   +150 added lines, -150 removed lines patch added patch discarded remove patch
@@ -7,156 +7,156 @@
 block discarded – undo
7 7
 class HTMLPurifier_Language
8 8
 {
9 9
 
10
-    /**
11
-     * ISO 639 language code of language. Prefers shortest possible version
12
-     */
13
-    public $code = 'en';
14
-
15
-    /**
16
-     * Fallback language code
17
-     */
18
-    public $fallback = false;
19
-
20
-    /**
21
-     * Array of localizable messages
22
-     */
23
-    public $messages = array();
24
-
25
-    /**
26
-     * Array of localizable error codes
27
-     */
28
-    public $errorNames = array();
29
-
30
-    /**
31
-     * True if no message file was found for this language, so English
32
-     * is being used instead. Check this if you'd like to notify the
33
-     * user that they've used a non-supported language.
34
-     */
35
-    public $error = false;
36
-
37
-    /**
38
-     * Has the language object been loaded yet?
39
-     * @todo Make it private, fix usage in HTMLPurifier_LanguageTest
40
-     */
41
-    public $_loaded = false;
42
-
43
-    /**
44
-     * Instances of HTMLPurifier_Config and HTMLPurifier_Context
45
-     */
46
-    protected $config, $context;
47
-
48
-    public function __construct($config, $context) {
49
-        $this->config  = $config;
50
-        $this->context = $context;
51
-    }
52
-
53
-    /**
54
-     * Loads language object with necessary info from factory cache
55
-     * @note This is a lazy loader
56
-     */
57
-    public function load() {
58
-        if ($this->_loaded) return;
59
-        $factory = HTMLPurifier_LanguageFactory::instance();
60
-        $factory->loadLanguage($this->code);
61
-        foreach ($factory->keys as $key) {
62
-            $this->$key = $factory->cache[$this->code][$key];
63
-        }
64
-        $this->_loaded = true;
65
-    }
66
-
67
-    /**
68
-     * Retrieves a localised message.
69
-     * @param $key string identifier of message
70
-     * @return string localised message
71
-     */
72
-    public function getMessage($key) {
73
-        if (!$this->_loaded) $this->load();
74
-        if (!isset($this->messages[$key])) return "[$key]";
75
-        return $this->messages[$key];
76
-    }
77
-
78
-    /**
79
-     * Retrieves a localised error name.
80
-     * @param $int integer error number, corresponding to PHP's error
81
-     *             reporting
82
-     * @return string localised message
83
-     */
84
-    public function getErrorName($int) {
85
-        if (!$this->_loaded) $this->load();
86
-        if (!isset($this->errorNames[$int])) return "[Error: $int]";
87
-        return $this->errorNames[$int];
88
-    }
89
-
90
-    /**
91
-     * Converts an array list into a string readable representation
92
-     */
93
-    public function listify($array) {
94
-        $sep      = $this->getMessage('Item separator');
95
-        $sep_last = $this->getMessage('Item separator last');
96
-        $ret = '';
97
-        for ($i = 0, $c = count($array); $i < $c; $i++) {
98
-            if ($i == 0) {
99
-            } elseif ($i + 1 < $c) {
100
-                $ret .= $sep;
101
-            } else {
102
-                $ret .= $sep_last;
103
-            }
104
-            $ret .= $array[$i];
105
-        }
106
-        return $ret;
107
-    }
108
-
109
-    /**
110
-     * Formats a localised message with passed parameters
111
-     * @param $key string identifier of message
112
-     * @param $args Parameters to substitute in
113
-     * @return string localised message
114
-     * @todo Implement conditionals? Right now, some messages make
115
-     *     reference to line numbers, but those aren't always available
116
-     */
117
-    public function formatMessage($key, $args = array()) {
118
-        if (!$this->_loaded) $this->load();
119
-        if (!isset($this->messages[$key])) return "[$key]";
120
-        $raw = $this->messages[$key];
121
-        $subst = array();
122
-        $generator = false;
123
-        foreach ($args as $i => $value) {
124
-            if (is_object($value)) {
125
-                if ($value instanceof HTMLPurifier_Token) {
126
-                    // factor this out some time
127
-                    if (!$generator) $generator = $this->context->get('Generator');
128
-                    if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
129
-                    if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
130
-                    $subst['$'.$i.'.Compact'] =
131
-                    $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
132
-                    // a more complex algorithm for compact representation
133
-                    // could be introduced for all types of tokens. This
134
-                    // may need to be factored out into a dedicated class
135
-                    if (!empty($value->attr)) {
136
-                        $stripped_token = clone $value;
137
-                        $stripped_token->attr = array();
138
-                        $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
139
-                    }
140
-                    $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
141
-                }
142
-                continue;
143
-            } elseif (is_array($value)) {
144
-                $keys = array_keys($value);
145
-                if (array_keys($keys) === $keys) {
146
-                    // list
147
-                    $subst['$'.$i] = $this->listify($value);
148
-                } else {
149
-                    // associative array
150
-                    // no $i implementation yet, sorry
151
-                    $subst['$'.$i.'.Keys'] = $this->listify($keys);
152
-                    $subst['$'.$i.'.Values'] = $this->listify(array_values($value));
153
-                }
154
-                continue;
155
-            }
156
-            $subst['$' . $i] = $value;
157
-        }
158
-        return strtr($raw, $subst);
159
-    }
10
+	/**
11
+	 * ISO 639 language code of language. Prefers shortest possible version
12
+	 */
13
+	public $code = 'en';
14
+
15
+	/**
16
+	 * Fallback language code
17
+	 */
18
+	public $fallback = false;
19
+
20
+	/**
21
+	 * Array of localizable messages
22
+	 */
23
+	public $messages = array();
24
+
25
+	/**
26
+	 * Array of localizable error codes
27
+	 */
28
+	public $errorNames = array();
29
+
30
+	/**
31
+	 * True if no message file was found for this language, so English
32
+	 * is being used instead. Check this if you'd like to notify the
33
+	 * user that they've used a non-supported language.
34
+	 */
35
+	public $error = false;
36
+
37
+	/**
38
+	 * Has the language object been loaded yet?
39
+	 * @todo Make it private, fix usage in HTMLPurifier_LanguageTest
40
+	 */
41
+	public $_loaded = false;
42
+
43
+	/**
44
+	 * Instances of HTMLPurifier_Config and HTMLPurifier_Context
45
+	 */
46
+	protected $config, $context;
47
+
48
+	public function __construct($config, $context) {
49
+		$this->config  = $config;
50
+		$this->context = $context;
51
+	}
52
+
53
+	/**
54
+	 * Loads language object with necessary info from factory cache
55
+	 * @note This is a lazy loader
56
+	 */
57
+	public function load() {
58
+		if ($this->_loaded) return;
59
+		$factory = HTMLPurifier_LanguageFactory::instance();
60
+		$factory->loadLanguage($this->code);
61
+		foreach ($factory->keys as $key) {
62
+			$this->$key = $factory->cache[$this->code][$key];
63
+		}
64
+		$this->_loaded = true;
65
+	}
66
+
67
+	/**
68
+	 * Retrieves a localised message.
69
+	 * @param $key string identifier of message
70
+	 * @return string localised message
71
+	 */
72
+	public function getMessage($key) {
73
+		if (!$this->_loaded) $this->load();
74
+		if (!isset($this->messages[$key])) return "[$key]";
75
+		return $this->messages[$key];
76
+	}
77
+
78
+	/**
79
+	 * Retrieves a localised error name.
80
+	 * @param $int integer error number, corresponding to PHP's error
81
+	 *             reporting
82
+	 * @return string localised message
83
+	 */
84
+	public function getErrorName($int) {
85
+		if (!$this->_loaded) $this->load();
86
+		if (!isset($this->errorNames[$int])) return "[Error: $int]";
87
+		return $this->errorNames[$int];
88
+	}
89
+
90
+	/**
91
+	 * Converts an array list into a string readable representation
92
+	 */
93
+	public function listify($array) {
94
+		$sep      = $this->getMessage('Item separator');
95
+		$sep_last = $this->getMessage('Item separator last');
96
+		$ret = '';
97
+		for ($i = 0, $c = count($array); $i < $c; $i++) {
98
+			if ($i == 0) {
99
+			} elseif ($i + 1 < $c) {
100
+				$ret .= $sep;
101
+			} else {
102
+				$ret .= $sep_last;
103
+			}
104
+			$ret .= $array[$i];
105
+		}
106
+		return $ret;
107
+	}
108
+
109
+	/**
110
+	 * Formats a localised message with passed parameters
111
+	 * @param $key string identifier of message
112
+	 * @param $args Parameters to substitute in
113
+	 * @return string localised message
114
+	 * @todo Implement conditionals? Right now, some messages make
115
+	 *     reference to line numbers, but those aren't always available
116
+	 */
117
+	public function formatMessage($key, $args = array()) {
118
+		if (!$this->_loaded) $this->load();
119
+		if (!isset($this->messages[$key])) return "[$key]";
120
+		$raw = $this->messages[$key];
121
+		$subst = array();
122
+		$generator = false;
123
+		foreach ($args as $i => $value) {
124
+			if (is_object($value)) {
125
+				if ($value instanceof HTMLPurifier_Token) {
126
+					// factor this out some time
127
+					if (!$generator) $generator = $this->context->get('Generator');
128
+					if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
129
+					if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
130
+					$subst['$'.$i.'.Compact'] =
131
+					$subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
132
+					// a more complex algorithm for compact representation
133
+					// could be introduced for all types of tokens. This
134
+					// may need to be factored out into a dedicated class
135
+					if (!empty($value->attr)) {
136
+						$stripped_token = clone $value;
137
+						$stripped_token->attr = array();
138
+						$subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token);
139
+					}
140
+					$subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown';
141
+				}
142
+				continue;
143
+			} elseif (is_array($value)) {
144
+				$keys = array_keys($value);
145
+				if (array_keys($keys) === $keys) {
146
+					// list
147
+					$subst['$'.$i] = $this->listify($value);
148
+				} else {
149
+					// associative array
150
+					// no $i implementation yet, sorry
151
+					$subst['$'.$i.'.Keys'] = $this->listify($keys);
152
+					$subst['$'.$i.'.Values'] = $this->listify(array_values($value));
153
+				}
154
+				continue;
155
+			}
156
+			$subst['$' . $i] = $value;
157
+		}
158
+		return strtr($raw, $subst);
159
+	}
160 160
 
161 161
 }
162 162
 
Please login to merge, or discard this patch.
Braces   +30 added lines, -10 removed lines patch added patch discarded remove patch
@@ -55,7 +55,9 @@  discard block
 block discarded – undo
55 55
      * @note This is a lazy loader
56 56
      */
57 57
     public function load() {
58
-        if ($this->_loaded) return;
58
+        if ($this->_loaded) {
59
+        	return;
60
+        }
59 61
         $factory = HTMLPurifier_LanguageFactory::instance();
60 62
         $factory->loadLanguage($this->code);
61 63
         foreach ($factory->keys as $key) {
@@ -70,8 +72,12 @@  discard block
 block discarded – undo
70 72
      * @return string localised message
71 73
      */
72 74
     public function getMessage($key) {
73
-        if (!$this->_loaded) $this->load();
74
-        if (!isset($this->messages[$key])) return "[$key]";
75
+        if (!$this->_loaded) {
76
+        	$this->load();
77
+        }
78
+        if (!isset($this->messages[$key])) {
79
+        	return "[$key]";
80
+        }
75 81
         return $this->messages[$key];
76 82
     }
77 83
 
@@ -82,8 +88,12 @@  discard block
 block discarded – undo
82 88
      * @return string localised message
83 89
      */
84 90
     public function getErrorName($int) {
85
-        if (!$this->_loaded) $this->load();
86
-        if (!isset($this->errorNames[$int])) return "[Error: $int]";
91
+        if (!$this->_loaded) {
92
+        	$this->load();
93
+        }
94
+        if (!isset($this->errorNames[$int])) {
95
+        	return "[Error: $int]";
96
+        }
87 97
         return $this->errorNames[$int];
88 98
     }
89 99
 
@@ -115,8 +125,12 @@  discard block
 block discarded – undo
115 125
      *     reference to line numbers, but those aren't always available
116 126
      */
117 127
     public function formatMessage($key, $args = array()) {
118
-        if (!$this->_loaded) $this->load();
119
-        if (!isset($this->messages[$key])) return "[$key]";
128
+        if (!$this->_loaded) {
129
+        	$this->load();
130
+        }
131
+        if (!isset($this->messages[$key])) {
132
+        	return "[$key]";
133
+        }
120 134
         $raw = $this->messages[$key];
121 135
         $subst = array();
122 136
         $generator = false;
@@ -124,9 +138,15 @@  discard block
 block discarded – undo
124 138
             if (is_object($value)) {
125 139
                 if ($value instanceof HTMLPurifier_Token) {
126 140
                     // factor this out some time
127
-                    if (!$generator) $generator = $this->context->get('Generator');
128
-                    if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name;
129
-                    if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data;
141
+                    if (!$generator) {
142
+                    	$generator = $this->context->get('Generator');
143
+                    }
144
+                    if (isset($value->name)) {
145
+                    	$subst['$'.$i.'.Name'] = $value->name;
146
+                    }
147
+                    if (isset($value->data)) {
148
+                    	$subst['$'.$i.'.Data'] = $value->data;
149
+                    }
130 150
                     $subst['$'.$i.'.Compact'] =
131 151
                     $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value);
132 152
                     // a more complex algorithm for compact representation
Please login to merge, or discard this patch.
Spacing   +1 added lines, -1 removed lines patch added patch discarded remove patch
@@ -153,7 +153,7 @@
 block discarded – undo
153 153
                 }
154 154
                 continue;
155 155
             }
156
-            $subst['$' . $i] = $value;
156
+            $subst['$'.$i] = $value;
157 157
         }
158 158
         return strtr($raw, $subst);
159 159
     }
Please login to merge, or discard this patch.
classes/security/htmlpurifier/library/HTMLPurifier/Lexer/DirectLex.php 4 patches
Doc Comments   +4 added lines, -1 removed lines patch added patch discarded remove patch
@@ -319,6 +319,9 @@  discard block
 block discarded – undo
319 319
 
320 320
     /**
321 321
      * PHP 5.0.x compatible substr_count that implements offset and length
322
+     * @param string $needle
323
+     * @param integer $offset
324
+     * @param integer $length
322 325
      */
323 326
     protected function substrCount($haystack, $needle, $offset, $length) {
324 327
         static $oldVersion;
@@ -336,7 +339,7 @@  discard block
 block discarded – undo
336 339
     /**
337 340
      * Takes the inside of an HTML tag and makes an assoc array of attributes.
338 341
      *
339
-     * @param $string Inside of tag excluding name.
342
+     * @param string $string Inside of tag excluding name.
340 343
      * @returns Assoc array of attributes.
341 344
      */
342 345
     public function parseAttributeString($string, $config, $context) {
Please login to merge, or discard this patch.
Indentation   +471 added lines, -471 removed lines patch added patch discarded remove patch
@@ -13,477 +13,477 @@
 block discarded – undo
13 13
 class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
14 14
 {
15 15
 
16
-    public $tracksLineNumbers = true;
17
-
18
-    /**
19
-     * Whitespace characters for str(c)spn.
20
-     */
21
-    protected $_whitespace = "\x20\x09\x0D\x0A";
22
-
23
-    /**
24
-     * Callback function for script CDATA fudge
25
-     * @param $matches, in form of array(opening tag, contents, closing tag)
26
-     */
27
-    protected function scriptCallback($matches) {
28
-        return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false) . $matches[3];
29
-    }
30
-
31
-    public function tokenizeHTML($html, $config, $context) {
32
-
33
-        // special normalization for script tags without any armor
34
-        // our "armor" heurstic is a < sign any number of whitespaces after
35
-        // the first script tag
36
-        if ($config->get('HTML.Trusted')) {
37
-            $html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
38
-                array($this, 'scriptCallback'), $html);
39
-        }
40
-
41
-        $html = $this->normalize($html, $config, $context);
42
-
43
-        $cursor = 0; // our location in the text
44
-        $inside_tag = false; // whether or not we're parsing the inside of a tag
45
-        $array = array(); // result array
46
-
47
-        // This is also treated to mean maintain *column* numbers too
48
-        $maintain_line_numbers = $config->get('Core.MaintainLineNumbers');
49
-
50
-        if ($maintain_line_numbers === null) {
51
-            // automatically determine line numbering by checking
52
-            // if error collection is on
53
-            $maintain_line_numbers = $config->get('Core.CollectErrors');
54
-        }
55
-
56
-        if ($maintain_line_numbers) {
57
-            $current_line = 1;
58
-            $current_col  = 0;
59
-            $length = strlen($html);
60
-        } else {
61
-            $current_line = false;
62
-            $current_col  = false;
63
-            $length = false;
64
-        }
65
-        $context->register('CurrentLine', $current_line);
66
-        $context->register('CurrentCol',  $current_col);
67
-        $nl = "\n";
68
-        // how often to manually recalculate. This will ALWAYS be right,
69
-        // but it's pretty wasteful. Set to 0 to turn off
70
-        $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval');
71
-
72
-        $e = false;
73
-        if ($config->get('Core.CollectErrors')) {
74
-            $e =& $context->get('ErrorCollector');
75
-        }
76
-
77
-        // for testing synchronization
78
-        $loops = 0;
79
-
80
-        while(++$loops) {
81
-
82
-            // $cursor is either at the start of a token, or inside of
83
-            // a tag (i.e. there was a < immediately before it), as indicated
84
-            // by $inside_tag
85
-
86
-            if ($maintain_line_numbers) {
87
-
88
-                // $rcursor, however, is always at the start of a token.
89
-                $rcursor = $cursor - (int) $inside_tag;
90
-
91
-                // Column number is cheap, so we calculate it every round.
92
-                // We're interested at the *end* of the newline string, so
93
-                // we need to add strlen($nl) == 1 to $nl_pos before subtracting it
94
-                // from our "rcursor" position.
95
-                $nl_pos = strrpos($html, $nl, $rcursor - $length);
96
-                $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
97
-
98
-                // recalculate lines
99
-                if (
100
-                    $synchronize_interval &&  // synchronization is on
101
-                    $cursor > 0 &&            // cursor is further than zero
102
-                    $loops % $synchronize_interval === 0 // time to synchronize!
103
-                ) {
104
-                    $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
105
-                }
106
-
107
-            }
108
-
109
-            $position_next_lt = strpos($html, '<', $cursor);
110
-            $position_next_gt = strpos($html, '>', $cursor);
111
-
112
-            // triggers on "<b>asdf</b>" but not "asdf <b></b>"
113
-            // special case to set up context
114
-            if ($position_next_lt === $cursor) {
115
-                $inside_tag = true;
116
-                $cursor++;
117
-            }
118
-
119
-            if (!$inside_tag && $position_next_lt !== false) {
120
-                // We are not inside tag and there still is another tag to parse
121
-                $token = new
122
-                    HTMLPurifier_Token_Text(
123
-                        $this->parseData(
124
-                            substr(
125
-                                $html, $cursor, $position_next_lt - $cursor
126
-                            )
127
-                        )
128
-                    );
129
-                if ($maintain_line_numbers) {
130
-                    $token->rawPosition($current_line, $current_col);
131
-                    $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
132
-                }
133
-                $array[] = $token;
134
-                $cursor  = $position_next_lt + 1;
135
-                $inside_tag = true;
136
-                continue;
137
-            } elseif (!$inside_tag) {
138
-                // We are not inside tag but there are no more tags
139
-                // If we're already at the end, break
140
-                if ($cursor === strlen($html)) break;
141
-                // Create Text of rest of string
142
-                $token = new
143
-                    HTMLPurifier_Token_Text(
144
-                        $this->parseData(
145
-                            substr(
146
-                                $html, $cursor
147
-                            )
148
-                        )
149
-                    );
150
-                if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
151
-                $array[] = $token;
152
-                break;
153
-            } elseif ($inside_tag && $position_next_gt !== false) {
154
-                // We are in tag and it is well formed
155
-                // Grab the internals of the tag
156
-                $strlen_segment = $position_next_gt - $cursor;
157
-
158
-                if ($strlen_segment < 1) {
159
-                    // there's nothing to process!
160
-                    $token = new HTMLPurifier_Token_Text('<');
161
-                    $cursor++;
162
-                    continue;
163
-                }
164
-
165
-                $segment = substr($html, $cursor, $strlen_segment);
166
-
167
-                if ($segment === false) {
168
-                    // somehow, we attempted to access beyond the end of
169
-                    // the string, defense-in-depth, reported by Nate Abele
170
-                    break;
171
-                }
172
-
173
-                // Check if it's a comment
174
-                if (
175
-                    substr($segment, 0, 3) === '!--'
176
-                ) {
177
-                    // re-determine segment length, looking for -->
178
-                    $position_comment_end = strpos($html, '-->', $cursor);
179
-                    if ($position_comment_end === false) {
180
-                        // uh oh, we have a comment that extends to
181
-                        // infinity. Can't be helped: set comment
182
-                        // end position to end of string
183
-                        if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment');
184
-                        $position_comment_end = strlen($html);
185
-                        $end = true;
186
-                    } else {
187
-                        $end = false;
188
-                    }
189
-                    $strlen_segment = $position_comment_end - $cursor;
190
-                    $segment = substr($html, $cursor, $strlen_segment);
191
-                    $token = new
192
-                        HTMLPurifier_Token_Comment(
193
-                            substr(
194
-                                $segment, 3, $strlen_segment - 3
195
-                            )
196
-                        );
197
-                    if ($maintain_line_numbers) {
198
-                        $token->rawPosition($current_line, $current_col);
199
-                        $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
200
-                    }
201
-                    $array[] = $token;
202
-                    $cursor = $end ? $position_comment_end : $position_comment_end + 3;
203
-                    $inside_tag = false;
204
-                    continue;
205
-                }
206
-
207
-                // Check if it's an end tag
208
-                $is_end_tag = (strpos($segment,'/') === 0);
209
-                if ($is_end_tag) {
210
-                    $type = substr($segment, 1);
211
-                    $token = new HTMLPurifier_Token_End($type);
212
-                    if ($maintain_line_numbers) {
213
-                        $token->rawPosition($current_line, $current_col);
214
-                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
215
-                    }
216
-                    $array[] = $token;
217
-                    $inside_tag = false;
218
-                    $cursor = $position_next_gt + 1;
219
-                    continue;
220
-                }
221
-
222
-                // Check leading character is alnum, if not, we may
223
-                // have accidently grabbed an emoticon. Translate into
224
-                // text and go our merry way
225
-                if (!ctype_alpha($segment[0])) {
226
-                    // XML:  $segment[0] !== '_' && $segment[0] !== ':'
227
-                    if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
228
-                    $token = new HTMLPurifier_Token_Text('<');
229
-                    if ($maintain_line_numbers) {
230
-                        $token->rawPosition($current_line, $current_col);
231
-                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
232
-                    }
233
-                    $array[] = $token;
234
-                    $inside_tag = false;
235
-                    continue;
236
-                }
237
-
238
-                // Check if it is explicitly self closing, if so, remove
239
-                // trailing slash. Remember, we could have a tag like <br>, so
240
-                // any later token processing scripts must convert improperly
241
-                // classified EmptyTags from StartTags.
242
-                $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
243
-                if ($is_self_closing) {
244
-                    $strlen_segment--;
245
-                    $segment = substr($segment, 0, $strlen_segment);
246
-                }
247
-
248
-                // Check if there are any attributes
249
-                $position_first_space = strcspn($segment, $this->_whitespace);
250
-
251
-                if ($position_first_space >= $strlen_segment) {
252
-                    if ($is_self_closing) {
253
-                        $token = new HTMLPurifier_Token_Empty($segment);
254
-                    } else {
255
-                        $token = new HTMLPurifier_Token_Start($segment);
256
-                    }
257
-                    if ($maintain_line_numbers) {
258
-                        $token->rawPosition($current_line, $current_col);
259
-                        $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
260
-                    }
261
-                    $array[] = $token;
262
-                    $inside_tag = false;
263
-                    $cursor = $position_next_gt + 1;
264
-                    continue;
265
-                }
266
-
267
-                // Grab out all the data
268
-                $type = substr($segment, 0, $position_first_space);
269
-                $attribute_string =
270
-                    trim(
271
-                        substr(
272
-                            $segment, $position_first_space
273
-                        )
274
-                    );
275
-                if ($attribute_string) {
276
-                    $attr = $this->parseAttributeString(
277
-                                    $attribute_string
278
-                                  , $config, $context
279
-                              );
280
-                } else {
281
-                    $attr = array();
282
-                }
283
-
284
-                if ($is_self_closing) {
285
-                    $token = new HTMLPurifier_Token_Empty($type, $attr);
286
-                } else {
287
-                    $token = new HTMLPurifier_Token_Start($type, $attr);
288
-                }
289
-                if ($maintain_line_numbers) {
290
-                    $token->rawPosition($current_line, $current_col);
291
-                    $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
292
-                }
293
-                $array[] = $token;
294
-                $cursor = $position_next_gt + 1;
295
-                $inside_tag = false;
296
-                continue;
297
-            } else {
298
-                // inside tag, but there's no ending > sign
299
-                if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
300
-                $token = new
301
-                    HTMLPurifier_Token_Text(
302
-                        '<' .
303
-                        $this->parseData(
304
-                            substr($html, $cursor)
305
-                        )
306
-                    );
307
-                if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
308
-                // no cursor scroll? Hmm...
309
-                $array[] = $token;
310
-                break;
311
-            }
312
-            break;
313
-        }
314
-
315
-        $context->destroy('CurrentLine');
316
-        $context->destroy('CurrentCol');
317
-        return $array;
318
-    }
319
-
320
-    /**
321
-     * PHP 5.0.x compatible substr_count that implements offset and length
322
-     */
323
-    protected function substrCount($haystack, $needle, $offset, $length) {
324
-        static $oldVersion;
325
-        if ($oldVersion === null) {
326
-            $oldVersion = version_compare(PHP_VERSION, '5.1', '<');
327
-        }
328
-        if ($oldVersion) {
329
-            $haystack = substr($haystack, $offset, $length);
330
-            return substr_count($haystack, $needle);
331
-        } else {
332
-            return substr_count($haystack, $needle, $offset, $length);
333
-        }
334
-    }
335
-
336
-    /**
337
-     * Takes the inside of an HTML tag and makes an assoc array of attributes.
338
-     *
339
-     * @param $string Inside of tag excluding name.
340
-     * @returns Assoc array of attributes.
341
-     */
342
-    public function parseAttributeString($string, $config, $context) {
343
-        $string = (string) $string; // quick typecast
344
-
345
-        if ($string == '') return array(); // no attributes
346
-
347
-        $e = false;
348
-        if ($config->get('Core.CollectErrors')) {
349
-            $e =& $context->get('ErrorCollector');
350
-        }
351
-
352
-        // let's see if we can abort as quickly as possible
353
-        // one equal sign, no spaces => one attribute
354
-        $num_equal = substr_count($string, '=');
355
-        $has_space = strpos($string, ' ');
356
-        if ($num_equal === 0 && !$has_space) {
357
-            // bool attribute
358
-            return array($string => $string);
359
-        } elseif ($num_equal === 1 && !$has_space) {
360
-            // only one attribute
361
-            list($key, $quoted_value) = explode('=', $string);
362
-            $quoted_value = trim($quoted_value);
363
-            if (!$key) {
364
-                if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
365
-                return array();
366
-            }
367
-            if (!$quoted_value) return array($key => '');
368
-            $first_char = @$quoted_value[0];
369
-            $last_char  = @$quoted_value[strlen($quoted_value)-1];
370
-
371
-            $same_quote = ($first_char == $last_char);
372
-            $open_quote = ($first_char == '"' || $first_char == "'");
373
-
374
-            if ( $same_quote && $open_quote) {
375
-                // well behaved
376
-                $value = substr($quoted_value, 1, strlen($quoted_value) - 2);
377
-            } else {
378
-                // not well behaved
379
-                if ($open_quote) {
380
-                    if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote');
381
-                    $value = substr($quoted_value, 1);
382
-                } else {
383
-                    $value = $quoted_value;
384
-                }
385
-            }
386
-            if ($value === false) $value = '';
387
-            return array($key => $this->parseData($value));
388
-        }
389
-
390
-        // setup loop environment
391
-        $array  = array(); // return assoc array of attributes
392
-        $cursor = 0; // current position in string (moves forward)
393
-        $size   = strlen($string); // size of the string (stays the same)
394
-
395
-        // if we have unquoted attributes, the parser expects a terminating
396
-        // space, so let's guarantee that there's always a terminating space.
397
-        $string .= ' ';
398
-
399
-        while(true) {
400
-
401
-            if ($cursor >= $size) {
402
-                break;
403
-            }
404
-
405
-            $cursor += ($value = strspn($string, $this->_whitespace, $cursor));
406
-            // grab the key
407
-
408
-            $key_begin = $cursor; //we're currently at the start of the key
409
-
410
-            // scroll past all characters that are the key (not whitespace or =)
411
-            $cursor += strcspn($string, $this->_whitespace . '=', $cursor);
412
-
413
-            $key_end = $cursor; // now at the end of the key
414
-
415
-            $key = substr($string, $key_begin, $key_end - $key_begin);
416
-
417
-            if (!$key) {
418
-                if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
419
-                $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
420
-                continue; // empty key
421
-            }
422
-
423
-            // scroll past all whitespace
424
-            $cursor += strspn($string, $this->_whitespace, $cursor);
425
-
426
-            if ($cursor >= $size) {
427
-                $array[$key] = $key;
428
-                break;
429
-            }
430
-
431
-            // if the next character is an equal sign, we've got a regular
432
-            // pair, otherwise, it's a bool attribute
433
-            $first_char = @$string[$cursor];
434
-
435
-            if ($first_char == '=') {
436
-                // key="value"
437
-
438
-                $cursor++;
439
-                $cursor += strspn($string, $this->_whitespace, $cursor);
440
-
441
-                if ($cursor === false) {
442
-                    $array[$key] = '';
443
-                    break;
444
-                }
445
-
446
-                // we might be in front of a quote right now
447
-
448
-                $char = @$string[$cursor];
449
-
450
-                if ($char == '"' || $char == "'") {
451
-                    // it's quoted, end bound is $char
452
-                    $cursor++;
453
-                    $value_begin = $cursor;
454
-                    $cursor = strpos($string, $char, $cursor);
455
-                    $value_end = $cursor;
456
-                } else {
457
-                    // it's not quoted, end bound is whitespace
458
-                    $value_begin = $cursor;
459
-                    $cursor += strcspn($string, $this->_whitespace, $cursor);
460
-                    $value_end = $cursor;
461
-                }
462
-
463
-                // we reached a premature end
464
-                if ($cursor === false) {
465
-                    $cursor = $size;
466
-                    $value_end = $cursor;
467
-                }
468
-
469
-                $value = substr($string, $value_begin, $value_end - $value_begin);
470
-                if ($value === false) $value = '';
471
-                $array[$key] = $this->parseData($value);
472
-                $cursor++;
473
-
474
-            } else {
475
-                // boolattr
476
-                if ($key !== '') {
477
-                    $array[$key] = $key;
478
-                } else {
479
-                    // purely theoretical
480
-                    if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
481
-                }
482
-
483
-            }
484
-        }
485
-        return $array;
486
-    }
16
+	public $tracksLineNumbers = true;
17
+
18
+	/**
19
+	 * Whitespace characters for str(c)spn.
20
+	 */
21
+	protected $_whitespace = "\x20\x09\x0D\x0A";
22
+
23
+	/**
24
+	 * Callback function for script CDATA fudge
25
+	 * @param $matches, in form of array(opening tag, contents, closing tag)
26
+	 */
27
+	protected function scriptCallback($matches) {
28
+		return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false) . $matches[3];
29
+	}
30
+
31
+	public function tokenizeHTML($html, $config, $context) {
32
+
33
+		// special normalization for script tags without any armor
34
+		// our "armor" heurstic is a < sign any number of whitespaces after
35
+		// the first script tag
36
+		if ($config->get('HTML.Trusted')) {
37
+			$html = preg_replace_callback('#(<script[^>]*>)(\s*[^<].+?)(</script>)#si',
38
+				array($this, 'scriptCallback'), $html);
39
+		}
40
+
41
+		$html = $this->normalize($html, $config, $context);
42
+
43
+		$cursor = 0; // our location in the text
44
+		$inside_tag = false; // whether or not we're parsing the inside of a tag
45
+		$array = array(); // result array
46
+
47
+		// This is also treated to mean maintain *column* numbers too
48
+		$maintain_line_numbers = $config->get('Core.MaintainLineNumbers');
49
+
50
+		if ($maintain_line_numbers === null) {
51
+			// automatically determine line numbering by checking
52
+			// if error collection is on
53
+			$maintain_line_numbers = $config->get('Core.CollectErrors');
54
+		}
55
+
56
+		if ($maintain_line_numbers) {
57
+			$current_line = 1;
58
+			$current_col  = 0;
59
+			$length = strlen($html);
60
+		} else {
61
+			$current_line = false;
62
+			$current_col  = false;
63
+			$length = false;
64
+		}
65
+		$context->register('CurrentLine', $current_line);
66
+		$context->register('CurrentCol',  $current_col);
67
+		$nl = "\n";
68
+		// how often to manually recalculate. This will ALWAYS be right,
69
+		// but it's pretty wasteful. Set to 0 to turn off
70
+		$synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval');
71
+
72
+		$e = false;
73
+		if ($config->get('Core.CollectErrors')) {
74
+			$e =& $context->get('ErrorCollector');
75
+		}
76
+
77
+		// for testing synchronization
78
+		$loops = 0;
79
+
80
+		while(++$loops) {
81
+
82
+			// $cursor is either at the start of a token, or inside of
83
+			// a tag (i.e. there was a < immediately before it), as indicated
84
+			// by $inside_tag
85
+
86
+			if ($maintain_line_numbers) {
87
+
88
+				// $rcursor, however, is always at the start of a token.
89
+				$rcursor = $cursor - (int) $inside_tag;
90
+
91
+				// Column number is cheap, so we calculate it every round.
92
+				// We're interested at the *end* of the newline string, so
93
+				// we need to add strlen($nl) == 1 to $nl_pos before subtracting it
94
+				// from our "rcursor" position.
95
+				$nl_pos = strrpos($html, $nl, $rcursor - $length);
96
+				$current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
97
+
98
+				// recalculate lines
99
+				if (
100
+					$synchronize_interval &&  // synchronization is on
101
+					$cursor > 0 &&            // cursor is further than zero
102
+					$loops % $synchronize_interval === 0 // time to synchronize!
103
+				) {
104
+					$current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
105
+				}
106
+
107
+			}
108
+
109
+			$position_next_lt = strpos($html, '<', $cursor);
110
+			$position_next_gt = strpos($html, '>', $cursor);
111
+
112
+			// triggers on "<b>asdf</b>" but not "asdf <b></b>"
113
+			// special case to set up context
114
+			if ($position_next_lt === $cursor) {
115
+				$inside_tag = true;
116
+				$cursor++;
117
+			}
118
+
119
+			if (!$inside_tag && $position_next_lt !== false) {
120
+				// We are not inside tag and there still is another tag to parse
121
+				$token = new
122
+					HTMLPurifier_Token_Text(
123
+						$this->parseData(
124
+							substr(
125
+								$html, $cursor, $position_next_lt - $cursor
126
+							)
127
+						)
128
+					);
129
+				if ($maintain_line_numbers) {
130
+					$token->rawPosition($current_line, $current_col);
131
+					$current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
132
+				}
133
+				$array[] = $token;
134
+				$cursor  = $position_next_lt + 1;
135
+				$inside_tag = true;
136
+				continue;
137
+			} elseif (!$inside_tag) {
138
+				// We are not inside tag but there are no more tags
139
+				// If we're already at the end, break
140
+				if ($cursor === strlen($html)) break;
141
+				// Create Text of rest of string
142
+				$token = new
143
+					HTMLPurifier_Token_Text(
144
+						$this->parseData(
145
+							substr(
146
+								$html, $cursor
147
+							)
148
+						)
149
+					);
150
+				if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
151
+				$array[] = $token;
152
+				break;
153
+			} elseif ($inside_tag && $position_next_gt !== false) {
154
+				// We are in tag and it is well formed
155
+				// Grab the internals of the tag
156
+				$strlen_segment = $position_next_gt - $cursor;
157
+
158
+				if ($strlen_segment < 1) {
159
+					// there's nothing to process!
160
+					$token = new HTMLPurifier_Token_Text('<');
161
+					$cursor++;
162
+					continue;
163
+				}
164
+
165
+				$segment = substr($html, $cursor, $strlen_segment);
166
+
167
+				if ($segment === false) {
168
+					// somehow, we attempted to access beyond the end of
169
+					// the string, defense-in-depth, reported by Nate Abele
170
+					break;
171
+				}
172
+
173
+				// Check if it's a comment
174
+				if (
175
+					substr($segment, 0, 3) === '!--'
176
+				) {
177
+					// re-determine segment length, looking for -->
178
+					$position_comment_end = strpos($html, '-->', $cursor);
179
+					if ($position_comment_end === false) {
180
+						// uh oh, we have a comment that extends to
181
+						// infinity. Can't be helped: set comment
182
+						// end position to end of string
183
+						if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment');
184
+						$position_comment_end = strlen($html);
185
+						$end = true;
186
+					} else {
187
+						$end = false;
188
+					}
189
+					$strlen_segment = $position_comment_end - $cursor;
190
+					$segment = substr($html, $cursor, $strlen_segment);
191
+					$token = new
192
+						HTMLPurifier_Token_Comment(
193
+							substr(
194
+								$segment, 3, $strlen_segment - 3
195
+							)
196
+						);
197
+					if ($maintain_line_numbers) {
198
+						$token->rawPosition($current_line, $current_col);
199
+						$current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
200
+					}
201
+					$array[] = $token;
202
+					$cursor = $end ? $position_comment_end : $position_comment_end + 3;
203
+					$inside_tag = false;
204
+					continue;
205
+				}
206
+
207
+				// Check if it's an end tag
208
+				$is_end_tag = (strpos($segment,'/') === 0);
209
+				if ($is_end_tag) {
210
+					$type = substr($segment, 1);
211
+					$token = new HTMLPurifier_Token_End($type);
212
+					if ($maintain_line_numbers) {
213
+						$token->rawPosition($current_line, $current_col);
214
+						$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
215
+					}
216
+					$array[] = $token;
217
+					$inside_tag = false;
218
+					$cursor = $position_next_gt + 1;
219
+					continue;
220
+				}
221
+
222
+				// Check leading character is alnum, if not, we may
223
+				// have accidently grabbed an emoticon. Translate into
224
+				// text and go our merry way
225
+				if (!ctype_alpha($segment[0])) {
226
+					// XML:  $segment[0] !== '_' && $segment[0] !== ':'
227
+					if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
228
+					$token = new HTMLPurifier_Token_Text('<');
229
+					if ($maintain_line_numbers) {
230
+						$token->rawPosition($current_line, $current_col);
231
+						$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
232
+					}
233
+					$array[] = $token;
234
+					$inside_tag = false;
235
+					continue;
236
+				}
237
+
238
+				// Check if it is explicitly self closing, if so, remove
239
+				// trailing slash. Remember, we could have a tag like <br>, so
240
+				// any later token processing scripts must convert improperly
241
+				// classified EmptyTags from StartTags.
242
+				$is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
243
+				if ($is_self_closing) {
244
+					$strlen_segment--;
245
+					$segment = substr($segment, 0, $strlen_segment);
246
+				}
247
+
248
+				// Check if there are any attributes
249
+				$position_first_space = strcspn($segment, $this->_whitespace);
250
+
251
+				if ($position_first_space >= $strlen_segment) {
252
+					if ($is_self_closing) {
253
+						$token = new HTMLPurifier_Token_Empty($segment);
254
+					} else {
255
+						$token = new HTMLPurifier_Token_Start($segment);
256
+					}
257
+					if ($maintain_line_numbers) {
258
+						$token->rawPosition($current_line, $current_col);
259
+						$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
260
+					}
261
+					$array[] = $token;
262
+					$inside_tag = false;
263
+					$cursor = $position_next_gt + 1;
264
+					continue;
265
+				}
266
+
267
+				// Grab out all the data
268
+				$type = substr($segment, 0, $position_first_space);
269
+				$attribute_string =
270
+					trim(
271
+						substr(
272
+							$segment, $position_first_space
273
+						)
274
+					);
275
+				if ($attribute_string) {
276
+					$attr = $this->parseAttributeString(
277
+									$attribute_string
278
+								  , $config, $context
279
+							  );
280
+				} else {
281
+					$attr = array();
282
+				}
283
+
284
+				if ($is_self_closing) {
285
+					$token = new HTMLPurifier_Token_Empty($type, $attr);
286
+				} else {
287
+					$token = new HTMLPurifier_Token_Start($type, $attr);
288
+				}
289
+				if ($maintain_line_numbers) {
290
+					$token->rawPosition($current_line, $current_col);
291
+					$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
292
+				}
293
+				$array[] = $token;
294
+				$cursor = $position_next_gt + 1;
295
+				$inside_tag = false;
296
+				continue;
297
+			} else {
298
+				// inside tag, but there's no ending > sign
299
+				if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
300
+				$token = new
301
+					HTMLPurifier_Token_Text(
302
+						'<' .
303
+						$this->parseData(
304
+							substr($html, $cursor)
305
+						)
306
+					);
307
+				if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
308
+				// no cursor scroll? Hmm...
309
+				$array[] = $token;
310
+				break;
311
+			}
312
+			break;
313
+		}
314
+
315
+		$context->destroy('CurrentLine');
316
+		$context->destroy('CurrentCol');
317
+		return $array;
318
+	}
319
+
320
+	/**
321
+	 * PHP 5.0.x compatible substr_count that implements offset and length
322
+	 */
323
+	protected function substrCount($haystack, $needle, $offset, $length) {
324
+		static $oldVersion;
325
+		if ($oldVersion === null) {
326
+			$oldVersion = version_compare(PHP_VERSION, '5.1', '<');
327
+		}
328
+		if ($oldVersion) {
329
+			$haystack = substr($haystack, $offset, $length);
330
+			return substr_count($haystack, $needle);
331
+		} else {
332
+			return substr_count($haystack, $needle, $offset, $length);
333
+		}
334
+	}
335
+
336
+	/**
337
+	 * Takes the inside of an HTML tag and makes an assoc array of attributes.
338
+	 *
339
+	 * @param $string Inside of tag excluding name.
340
+	 * @returns Assoc array of attributes.
341
+	 */
342
+	public function parseAttributeString($string, $config, $context) {
343
+		$string = (string) $string; // quick typecast
344
+
345
+		if ($string == '') return array(); // no attributes
346
+
347
+		$e = false;
348
+		if ($config->get('Core.CollectErrors')) {
349
+			$e =& $context->get('ErrorCollector');
350
+		}
351
+
352
+		// let's see if we can abort as quickly as possible
353
+		// one equal sign, no spaces => one attribute
354
+		$num_equal = substr_count($string, '=');
355
+		$has_space = strpos($string, ' ');
356
+		if ($num_equal === 0 && !$has_space) {
357
+			// bool attribute
358
+			return array($string => $string);
359
+		} elseif ($num_equal === 1 && !$has_space) {
360
+			// only one attribute
361
+			list($key, $quoted_value) = explode('=', $string);
362
+			$quoted_value = trim($quoted_value);
363
+			if (!$key) {
364
+				if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
365
+				return array();
366
+			}
367
+			if (!$quoted_value) return array($key => '');
368
+			$first_char = @$quoted_value[0];
369
+			$last_char  = @$quoted_value[strlen($quoted_value)-1];
370
+
371
+			$same_quote = ($first_char == $last_char);
372
+			$open_quote = ($first_char == '"' || $first_char == "'");
373
+
374
+			if ( $same_quote && $open_quote) {
375
+				// well behaved
376
+				$value = substr($quoted_value, 1, strlen($quoted_value) - 2);
377
+			} else {
378
+				// not well behaved
379
+				if ($open_quote) {
380
+					if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote');
381
+					$value = substr($quoted_value, 1);
382
+				} else {
383
+					$value = $quoted_value;
384
+				}
385
+			}
386
+			if ($value === false) $value = '';
387
+			return array($key => $this->parseData($value));
388
+		}
389
+
390
+		// setup loop environment
391
+		$array  = array(); // return assoc array of attributes
392
+		$cursor = 0; // current position in string (moves forward)
393
+		$size   = strlen($string); // size of the string (stays the same)
394
+
395
+		// if we have unquoted attributes, the parser expects a terminating
396
+		// space, so let's guarantee that there's always a terminating space.
397
+		$string .= ' ';
398
+
399
+		while(true) {
400
+
401
+			if ($cursor >= $size) {
402
+				break;
403
+			}
404
+
405
+			$cursor += ($value = strspn($string, $this->_whitespace, $cursor));
406
+			// grab the key
407
+
408
+			$key_begin = $cursor; //we're currently at the start of the key
409
+
410
+			// scroll past all characters that are the key (not whitespace or =)
411
+			$cursor += strcspn($string, $this->_whitespace . '=', $cursor);
412
+
413
+			$key_end = $cursor; // now at the end of the key
414
+
415
+			$key = substr($string, $key_begin, $key_end - $key_begin);
416
+
417
+			if (!$key) {
418
+				if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
419
+				$cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
420
+				continue; // empty key
421
+			}
422
+
423
+			// scroll past all whitespace
424
+			$cursor += strspn($string, $this->_whitespace, $cursor);
425
+
426
+			if ($cursor >= $size) {
427
+				$array[$key] = $key;
428
+				break;
429
+			}
430
+
431
+			// if the next character is an equal sign, we've got a regular
432
+			// pair, otherwise, it's a bool attribute
433
+			$first_char = @$string[$cursor];
434
+
435
+			if ($first_char == '=') {
436
+				// key="value"
437
+
438
+				$cursor++;
439
+				$cursor += strspn($string, $this->_whitespace, $cursor);
440
+
441
+				if ($cursor === false) {
442
+					$array[$key] = '';
443
+					break;
444
+				}
445
+
446
+				// we might be in front of a quote right now
447
+
448
+				$char = @$string[$cursor];
449
+
450
+				if ($char == '"' || $char == "'") {
451
+					// it's quoted, end bound is $char
452
+					$cursor++;
453
+					$value_begin = $cursor;
454
+					$cursor = strpos($string, $char, $cursor);
455
+					$value_end = $cursor;
456
+				} else {
457
+					// it's not quoted, end bound is whitespace
458
+					$value_begin = $cursor;
459
+					$cursor += strcspn($string, $this->_whitespace, $cursor);
460
+					$value_end = $cursor;
461
+				}
462
+
463
+				// we reached a premature end
464
+				if ($cursor === false) {
465
+					$cursor = $size;
466
+					$value_end = $cursor;
467
+				}
468
+
469
+				$value = substr($string, $value_begin, $value_end - $value_begin);
470
+				if ($value === false) $value = '';
471
+				$array[$key] = $this->parseData($value);
472
+				$cursor++;
473
+
474
+			} else {
475
+				// boolattr
476
+				if ($key !== '') {
477
+					$array[$key] = $key;
478
+				} else {
479
+					// purely theoretical
480
+					if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
481
+				}
482
+
483
+			}
484
+		}
485
+		return $array;
486
+	}
487 487
 
488 488
 }
489 489
 
Please login to merge, or discard this patch.
Braces   +43 added lines, -14 removed lines patch added patch discarded remove patch
@@ -137,7 +137,9 @@  discard block
 block discarded – undo
137 137
             } elseif (!$inside_tag) {
138 138
                 // We are not inside tag but there are no more tags
139 139
                 // If we're already at the end, break
140
-                if ($cursor === strlen($html)) break;
140
+                if ($cursor === strlen($html)) {
141
+                	break;
142
+                }
141 143
                 // Create Text of rest of string
142 144
                 $token = new
143 145
                     HTMLPurifier_Token_Text(
@@ -147,7 +149,9 @@  discard block
 block discarded – undo
147 149
                             )
148 150
                         )
149 151
                     );
150
-                if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
152
+                if ($maintain_line_numbers) {
153
+                	$token->rawPosition($current_line, $current_col);
154
+                }
151 155
                 $array[] = $token;
152 156
                 break;
153 157
             } elseif ($inside_tag && $position_next_gt !== false) {
@@ -180,7 +184,9 @@  discard block
 block discarded – undo
180 184
                         // uh oh, we have a comment that extends to
181 185
                         // infinity. Can't be helped: set comment
182 186
                         // end position to end of string
183
-                        if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment');
187
+                        if ($e) {
188
+                        	$e->send(E_WARNING, 'Lexer: Unclosed comment');
189
+                        }
184 190
                         $position_comment_end = strlen($html);
185 191
                         $end = true;
186 192
                     } else {
@@ -224,7 +230,9 @@  discard block
 block discarded – undo
224 230
                 // text and go our merry way
225 231
                 if (!ctype_alpha($segment[0])) {
226 232
                     // XML:  $segment[0] !== '_' && $segment[0] !== ':'
227
-                    if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
233
+                    if ($e) {
234
+                    	$e->send(E_NOTICE, 'Lexer: Unescaped lt');
235
+                    }
228 236
                     $token = new HTMLPurifier_Token_Text('<');
229 237
                     if ($maintain_line_numbers) {
230 238
                         $token->rawPosition($current_line, $current_col);
@@ -296,7 +304,9 @@  discard block
 block discarded – undo
296 304
                 continue;
297 305
             } else {
298 306
                 // inside tag, but there's no ending > sign
299
-                if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
307
+                if ($e) {
308
+                	$e->send(E_WARNING, 'Lexer: Missing gt');
309
+                }
300 310
                 $token = new
301 311
                     HTMLPurifier_Token_Text(
302 312
                         '<' .
@@ -304,7 +314,9 @@  discard block
 block discarded – undo
304 314
                             substr($html, $cursor)
305 315
                         )
306 316
                     );
307
-                if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
317
+                if ($maintain_line_numbers) {
318
+                	$token->rawPosition($current_line, $current_col);
319
+                }
308 320
                 // no cursor scroll? Hmm...
309 321
                 $array[] = $token;
310 322
                 break;
@@ -342,7 +354,10 @@  discard block
 block discarded – undo
342 354
     public function parseAttributeString($string, $config, $context) {
343 355
         $string = (string) $string; // quick typecast
344 356
 
345
-        if ($string == '') return array(); // no attributes
357
+        if ($string == '') {
358
+        	return array();
359
+        }
360
+        // no attributes
346 361
 
347 362
         $e = false;
348 363
         if ($config->get('Core.CollectErrors')) {
@@ -361,10 +376,14 @@  discard block
 block discarded – undo
361 376
             list($key, $quoted_value) = explode('=', $string);
362 377
             $quoted_value = trim($quoted_value);
363 378
             if (!$key) {
364
-                if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
379
+                if ($e) {
380
+                	$e->send(E_ERROR, 'Lexer: Missing attribute key');
381
+                }
365 382
                 return array();
366 383
             }
367
-            if (!$quoted_value) return array($key => '');
384
+            if (!$quoted_value) {
385
+            	return array($key => '');
386
+            }
368 387
             $first_char = @$quoted_value[0];
369 388
             $last_char  = @$quoted_value[strlen($quoted_value)-1];
370 389
 
@@ -377,13 +396,17 @@  discard block
 block discarded – undo
377 396
             } else {
378 397
                 // not well behaved
379 398
                 if ($open_quote) {
380
-                    if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote');
399
+                    if ($e) {
400
+                    	$e->send(E_ERROR, 'Lexer: Missing end quote');
401
+                    }
381 402
                     $value = substr($quoted_value, 1);
382 403
                 } else {
383 404
                     $value = $quoted_value;
384 405
                 }
385 406
             }
386
-            if ($value === false) $value = '';
407
+            if ($value === false) {
408
+            	$value = '';
409
+            }
387 410
             return array($key => $this->parseData($value));
388 411
         }
389 412
 
@@ -415,7 +438,9 @@  discard block
 block discarded – undo
415 438
             $key = substr($string, $key_begin, $key_end - $key_begin);
416 439
 
417 440
             if (!$key) {
418
-                if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
441
+                if ($e) {
442
+                	$e->send(E_ERROR, 'Lexer: Missing attribute key');
443
+                }
419 444
                 $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop
420 445
                 continue; // empty key
421 446
             }
@@ -467,7 +492,9 @@  discard block
 block discarded – undo
467 492
                 }
468 493
 
469 494
                 $value = substr($string, $value_begin, $value_end - $value_begin);
470
-                if ($value === false) $value = '';
495
+                if ($value === false) {
496
+                	$value = '';
497
+                }
471 498
                 $array[$key] = $this->parseData($value);
472 499
                 $cursor++;
473 500
 
@@ -477,7 +504,9 @@  discard block
 block discarded – undo
477 504
                     $array[$key] = $key;
478 505
                 } else {
479 506
                     // purely theoretical
480
-                    if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key');
507
+                    if ($e) {
508
+                    	$e->send(E_ERROR, 'Lexer: Missing attribute key');
509
+                    }
481 510
                 }
482 511
 
483 512
             }
Please login to merge, or discard this patch.
Spacing   +14 added lines, -14 removed lines patch added patch discarded remove patch
@@ -25,7 +25,7 @@  discard block
 block discarded – undo
25 25
      * @param $matches, in form of array(opening tag, contents, closing tag)
26 26
      */
27 27
     protected function scriptCallback($matches) {
28
-        return $matches[1] . htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false) . $matches[3];
28
+        return $matches[1].htmlspecialchars($matches[2], ENT_COMPAT, 'UTF-8', false).$matches[3];
29 29
     }
30 30
 
31 31
     public function tokenizeHTML($html, $config, $context) {
@@ -63,7 +63,7 @@  discard block
 block discarded – undo
63 63
             $length = false;
64 64
         }
65 65
         $context->register('CurrentLine', $current_line);
66
-        $context->register('CurrentCol',  $current_col);
66
+        $context->register('CurrentCol', $current_col);
67 67
         $nl = "\n";
68 68
         // how often to manually recalculate. This will ALWAYS be right,
69 69
         // but it's pretty wasteful. Set to 0 to turn off
@@ -71,13 +71,13 @@  discard block
 block discarded – undo
71 71
 
72 72
         $e = false;
73 73
         if ($config->get('Core.CollectErrors')) {
74
-            $e =& $context->get('ErrorCollector');
74
+            $e = & $context->get('ErrorCollector');
75 75
         }
76 76
 
77 77
         // for testing synchronization
78 78
         $loops = 0;
79 79
 
80
-        while(++$loops) {
80
+        while (++$loops) {
81 81
 
82 82
             // $cursor is either at the start of a token, or inside of
83 83
             // a tag (i.e. there was a < immediately before it), as indicated
@@ -97,8 +97,8 @@  discard block
 block discarded – undo
97 97
 
98 98
                 // recalculate lines
99 99
                 if (
100
-                    $synchronize_interval &&  // synchronization is on
101
-                    $cursor > 0 &&            // cursor is further than zero
100
+                    $synchronize_interval && // synchronization is on
101
+                    $cursor > 0 && // cursor is further than zero
102 102
                     $loops % $synchronize_interval === 0 // time to synchronize!
103 103
                 ) {
104 104
                     $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
@@ -205,7 +205,7 @@  discard block
 block discarded – undo
205 205
                 }
206 206
 
207 207
                 // Check if it's an end tag
208
-                $is_end_tag = (strpos($segment,'/') === 0);
208
+                $is_end_tag = (strpos($segment, '/') === 0);
209 209
                 if ($is_end_tag) {
210 210
                     $type = substr($segment, 1);
211 211
                     $token = new HTMLPurifier_Token_End($type);
@@ -239,7 +239,7 @@  discard block
 block discarded – undo
239 239
                 // trailing slash. Remember, we could have a tag like <br>, so
240 240
                 // any later token processing scripts must convert improperly
241 241
                 // classified EmptyTags from StartTags.
242
-                $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1);
242
+                $is_self_closing = (strrpos($segment, '/') === $strlen_segment - 1);
243 243
                 if ($is_self_closing) {
244 244
                     $strlen_segment--;
245 245
                     $segment = substr($segment, 0, $strlen_segment);
@@ -299,7 +299,7 @@  discard block
 block discarded – undo
299 299
                 if ($e) $e->send(E_WARNING, 'Lexer: Missing gt');
300 300
                 $token = new
301 301
                     HTMLPurifier_Token_Text(
302
-                        '<' .
302
+                        '<'.
303 303
                         $this->parseData(
304 304
                             substr($html, $cursor)
305 305
                         )
@@ -346,7 +346,7 @@  discard block
 block discarded – undo
346 346
 
347 347
         $e = false;
348 348
         if ($config->get('Core.CollectErrors')) {
349
-            $e =& $context->get('ErrorCollector');
349
+            $e = & $context->get('ErrorCollector');
350 350
         }
351 351
 
352 352
         // let's see if we can abort as quickly as possible
@@ -366,12 +366,12 @@  discard block
 block discarded – undo
366 366
             }
367 367
             if (!$quoted_value) return array($key => '');
368 368
             $first_char = @$quoted_value[0];
369
-            $last_char  = @$quoted_value[strlen($quoted_value)-1];
369
+            $last_char  = @$quoted_value[strlen($quoted_value) - 1];
370 370
 
371 371
             $same_quote = ($first_char == $last_char);
372 372
             $open_quote = ($first_char == '"' || $first_char == "'");
373 373
 
374
-            if ( $same_quote && $open_quote) {
374
+            if ($same_quote && $open_quote) {
375 375
                 // well behaved
376 376
                 $value = substr($quoted_value, 1, strlen($quoted_value) - 2);
377 377
             } else {
@@ -396,7 +396,7 @@  discard block
 block discarded – undo
396 396
         // space, so let's guarantee that there's always a terminating space.
397 397
         $string .= ' ';
398 398
 
399
-        while(true) {
399
+        while (true) {
400 400
 
401 401
             if ($cursor >= $size) {
402 402
                 break;
@@ -408,7 +408,7 @@  discard block
 block discarded – undo
408 408
             $key_begin = $cursor; //we're currently at the start of the key
409 409
 
410 410
             // scroll past all characters that are the key (not whitespace or =)
411
-            $cursor += strcspn($string, $this->_whitespace . '=', $cursor);
411
+            $cursor += strcspn($string, $this->_whitespace.'=', $cursor);
412 412
 
413 413
             $key_end = $cursor; // now at the end of the key
414 414
 
Please login to merge, or discard this patch.