| @@ 434-443 (lines=10) @@ | ||
| 431 | case 3: |
|
| 432 | $cp = (($q[0] ^ 0xE0) << 12) | (($q[1] ^ 0x80) << 6) | ($q[2] ^ 0x80); |
|
| 433 | // Overlong sequence |
|
| 434 | if ($cp < 0x800) {
|
|
| 435 | $out[] = 0xFFFD; |
|
| 436 | } |
|
| 437 | // Check for UTF-8 encoded surrogates (caused by a bad UTF-8 encoder) |
|
| 438 | else if ($c > 0xD800 && $c < 0xDFFF) {
|
|
| 439 | $out[] = 0xFFFD; |
|
| 440 | } |
|
| 441 | else {
|
|
| 442 | $out[] = $cp; |
|
| 443 | } |
|
| 444 | continue; |
|
| 445 | ||
| 446 | case 4: |
|
| @@ 449-458 (lines=10) @@ | ||
| 446 | case 4: |
|
| 447 | $cp = (($q[0] ^ 0xF0) << 18) | (($q[1] ^ 0x80) << 12) | (($q[2] ^ 0x80) << 6) | ($q[3] ^ 0x80); |
|
| 448 | // Overlong sequence |
|
| 449 | if ($cp < 0x10000) {
|
|
| 450 | $out[] = 0xFFFD; |
|
| 451 | } |
|
| 452 | // Outside of the Unicode range |
|
| 453 | else if ($cp >= 0x10FFFF) {
|
|
| 454 | $out[] = 0xFFFD; |
|
| 455 | } |
|
| 456 | else {
|
|
| 457 | $out[] = $cp; |
|
| 458 | } |
|
| 459 | continue; |
|
| 460 | } |
|
| 461 | } |
|