| @@ 2977-2983 (lines=7) @@ | ||
| 2974 | if (0 == (0x80 & $in)) { |
|
| 2975 | // US-ASCII, pass straight through. |
|
| 2976 | $mBytes = 1; |
|
| 2977 | } elseif (0xC0 == (0xE0 & $in)) { |
|
| 2978 | // First octet of 2 octet sequence. |
|
| 2979 | $mUcs4 = $in; |
|
| 2980 | $mUcs4 = ($mUcs4 & 0x1F) << 6; |
|
| 2981 | $mState = 1; |
|
| 2982 | $mBytes = 2; |
|
| 2983 | } elseif (0xE0 == (0xF0 & $in)) { |
|
| 2984 | // First octet of 3 octet sequence. |
|
| 2985 | $mUcs4 = $in; |
|
| 2986 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
| @@ 2989-2995 (lines=7) @@ | ||
| 2986 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
| 2987 | $mState = 2; |
|
| 2988 | $mBytes = 3; |
|
| 2989 | } elseif (0xF0 == (0xF8 & $in)) { |
|
| 2990 | // First octet of 4 octet sequence. |
|
| 2991 | $mUcs4 = $in; |
|
| 2992 | $mUcs4 = ($mUcs4 & 0x07) << 18; |
|
| 2993 | $mState = 3; |
|
| 2994 | $mBytes = 4; |
|
| 2995 | } elseif (0xF8 == (0xFC & $in)) { |
|
| 2996 | /* First octet of 5 octet sequence. |
|
| 2997 | * |
|
| 2998 | * This is illegal because the encoded codepoint must be either |
|
| @@ 3008-3014 (lines=7) @@ | ||
| 3005 | $mUcs4 = ($mUcs4 & 0x03) << 24; |
|
| 3006 | $mState = 4; |
|
| 3007 | $mBytes = 5; |
|
| 3008 | } elseif (0xFC == (0xFE & $in)) { |
|
| 3009 | // First octet of 6 octet sequence, see comments for 5 octet sequence. |
|
| 3010 | $mUcs4 = $in; |
|
| 3011 | $mUcs4 = ($mUcs4 & 1) << 30; |
|
| 3012 | $mState = 5; |
|
| 3013 | $mBytes = 6; |
|
| 3014 | } else { |
|
| 3015 | /* Current octet is neither in the US-ASCII range nor a legal first |
|
| 3016 | * octet of a multi-octet sequence. |
|
| 3017 | */ |
|