| @@ 2756-2762 (lines=7) @@ | ||
| 2753 | if (0 === (0x80 & $in)) { |
|
| 2754 | // US-ASCII, pass straight through. |
|
| 2755 | $mBytes = 1; |
|
| 2756 | } elseif (0xC0 === (0xE0 & $in)) { |
|
| 2757 | // First octet of 2 octet sequence. |
|
| 2758 | $mUcs4 = $in; |
|
| 2759 | $mUcs4 = ($mUcs4 & 0x1F) << 6; |
|
| 2760 | $mState = 1; |
|
| 2761 | $mBytes = 2; |
|
| 2762 | } elseif (0xE0 === (0xF0 & $in)) { |
|
| 2763 | // First octet of 3 octet sequence. |
|
| 2764 | $mUcs4 = $in; |
|
| 2765 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
| @@ 2768-2774 (lines=7) @@ | ||
| 2765 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
| 2766 | $mState = 2; |
|
| 2767 | $mBytes = 3; |
|
| 2768 | } elseif (0xF0 === (0xF8 & $in)) { |
|
| 2769 | // First octet of 4 octet sequence. |
|
| 2770 | $mUcs4 = $in; |
|
| 2771 | $mUcs4 = ($mUcs4 & 0x07) << 18; |
|
| 2772 | $mState = 3; |
|
| 2773 | $mBytes = 4; |
|
| 2774 | } elseif (0xF8 === (0xFC & $in)) { |
|
| 2775 | /* First octet of 5 octet sequence. |
|
| 2776 | * |
|
| 2777 | * This is illegal because the encoded codepoint must be either |
|
| @@ 2787-2793 (lines=7) @@ | ||
| 2784 | $mUcs4 = ($mUcs4 & 0x03) << 24; |
|
| 2785 | $mState = 4; |
|
| 2786 | $mBytes = 5; |
|
| 2787 | } elseif (0xFC === (0xFE & $in)) { |
|
| 2788 | // First octet of 6 octet sequence, see comments for 5 octet sequence. |
|
| 2789 | $mUcs4 = $in; |
|
| 2790 | $mUcs4 = ($mUcs4 & 1) << 30; |
|
| 2791 | $mState = 5; |
|
| 2792 | $mBytes = 6; |
|
| 2793 | } else { |
|
| 2794 | /* Current octet is neither in the US-ASCII range nor a legal first |
|
| 2795 | * octet of a multi-octet sequence. |
|
| 2796 | */ |
|