| @@ 3430-3436 (lines=7) @@ | ||
| 3427 | if (0 == (0x80 & $in)) { |
|
| 3428 | // US-ASCII, pass straight through. |
|
| 3429 | $mBytes = 1; |
|
| 3430 | } elseif (0xC0 == (0xE0 & $in)) { |
|
| 3431 | // First octet of 2 octet sequence. |
|
| 3432 | $mUcs4 = $in; |
|
| 3433 | $mUcs4 = ($mUcs4 & 0x1F) << 6; |
|
| 3434 | $mState = 1; |
|
| 3435 | $mBytes = 2; |
|
| 3436 | } elseif (0xE0 == (0xF0 & $in)) { |
|
| 3437 | // First octet of 3 octet sequence. |
|
| 3438 | $mUcs4 = $in; |
|
| 3439 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
| @@ 3442-3448 (lines=7) @@ | ||
| 3439 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
| 3440 | $mState = 2; |
|
| 3441 | $mBytes = 3; |
|
| 3442 | } elseif (0xF0 == (0xF8 & $in)) { |
|
| 3443 | // First octet of 4 octet sequence. |
|
| 3444 | $mUcs4 = $in; |
|
| 3445 | $mUcs4 = ($mUcs4 & 0x07) << 18; |
|
| 3446 | $mState = 3; |
|
| 3447 | $mBytes = 4; |
|
| 3448 | } elseif (0xF8 == (0xFC & $in)) { |
|
| 3449 | /* First octet of 5 octet sequence. |
|
| 3450 | * |
|
| 3451 | * This is illegal because the encoded codepoint must be either |
|
| @@ 3461-3467 (lines=7) @@ | ||
| 3458 | $mUcs4 = ($mUcs4 & 0x03) << 24; |
|
| 3459 | $mState = 4; |
|
| 3460 | $mBytes = 5; |
|
| 3461 | } elseif (0xFC == (0xFE & $in)) { |
|
| 3462 | // First octet of 6 octet sequence, see comments for 5 octet sequence. |
|
| 3463 | $mUcs4 = $in; |
|
| 3464 | $mUcs4 = ($mUcs4 & 1) << 30; |
|
| 3465 | $mState = 5; |
|
| 3466 | $mBytes = 6; |
|
| 3467 | } else { |
|
| 3468 | /* Current octet is neither in the US-ASCII range nor a legal first |
|
| 3469 | * octet of a multi-octet sequence. |
|
| 3470 | */ |
|