| @@ 3685-3691 (lines=7) @@ | ||
| 3682 | if (0 === (0x80 & $in)) { |
|
| 3683 | // US-ASCII, pass straight through. |
|
| 3684 | $mBytes = 1; |
|
| 3685 | } elseif (0xC0 === (0xE0 & $in)) { |
|
| 3686 | // First octet of 2 octet sequence. |
|
| 3687 | $mUcs4 = $in; |
|
| 3688 | $mUcs4 = ($mUcs4 & 0x1F) << 6; |
|
| 3689 | $mState = 1; |
|
| 3690 | $mBytes = 2; |
|
| 3691 | } elseif (0xE0 === (0xF0 & $in)) { |
|
| 3692 | // First octet of 3 octet sequence. |
|
| 3693 | $mUcs4 = $in; |
|
| 3694 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
| @@ 3697-3703 (lines=7) @@ | ||
| 3694 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
| 3695 | $mState = 2; |
|
| 3696 | $mBytes = 3; |
|
| 3697 | } elseif (0xF0 === (0xF8 & $in)) { |
|
| 3698 | // First octet of 4 octet sequence. |
|
| 3699 | $mUcs4 = $in; |
|
| 3700 | $mUcs4 = ($mUcs4 & 0x07) << 18; |
|
| 3701 | $mState = 3; |
|
| 3702 | $mBytes = 4; |
|
| 3703 | } elseif (0xF8 === (0xFC & $in)) { |
|
| 3704 | /* First octet of 5 octet sequence. |
|
| 3705 | * |
|
| 3706 | * This is illegal because the encoded codepoint must be either |
|
| @@ 3716-3722 (lines=7) @@ | ||
| 3713 | $mUcs4 = ($mUcs4 & 0x03) << 24; |
|
| 3714 | $mState = 4; |
|
| 3715 | $mBytes = 5; |
|
| 3716 | } elseif (0xFC === (0xFE & $in)) { |
|
| 3717 | // First octet of 6 octet sequence, see comments for 5 octet sequence. |
|
| 3718 | $mUcs4 = $in; |
|
| 3719 | $mUcs4 = ($mUcs4 & 1) << 30; |
|
| 3720 | $mState = 5; |
|
| 3721 | $mBytes = 6; |
|
| 3722 | } else { |
|
| 3723 | /* Current octet is neither in the US-ASCII range nor a legal first |
|
| 3724 | * octet of a multi-octet sequence. |
|
| 3725 | */ |
|