| @@ 1159-1165 (lines=7) @@ | ||
| 1156 | if (0 == (0x80 & $in)) { |
|
| 1157 | // US-ASCII, pass straight through. |
|
| 1158 | $mBytes = 1; |
|
| 1159 | } elseif (0xC0 == (0xE0 & $in)) { |
|
| 1160 | // First octet of 2 octet sequence. |
|
| 1161 | $mUcs4 = $in; |
|
| 1162 | $mUcs4 = ($mUcs4 & 0x1F) << 6; |
|
| 1163 | $mState = 1; |
|
| 1164 | $mBytes = 2; |
|
| 1165 | } elseif (0xE0 == (0xF0 & $in)) { |
|
| 1166 | // First octet of 3 octet sequence. |
|
| 1167 | $mUcs4 = $in; |
|
| 1168 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
| @@ 1171-1177 (lines=7) @@ | ||
| 1168 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
| 1169 | $mState = 2; |
|
| 1170 | $mBytes = 3; |
|
| 1171 | } elseif (0xF0 == (0xF8 & $in)) { |
|
| 1172 | // First octet of 4 octet sequence. |
|
| 1173 | $mUcs4 = $in; |
|
| 1174 | $mUcs4 = ($mUcs4 & 0x07) << 18; |
|
| 1175 | $mState = 3; |
|
| 1176 | $mBytes = 4; |
|
| 1177 | } elseif (0xF8 == (0xFC & $in)) { |
|
| 1178 | /* First octet of 5 octet sequence. |
|
| 1179 | * |
|
| 1180 | * This is illegal because the encoded codepoint must be either |
|
| @@ 1190-1196 (lines=7) @@ | ||
| 1187 | $mUcs4 = ($mUcs4 & 0x03) << 24; |
|
| 1188 | $mState = 4; |
|
| 1189 | $mBytes = 5; |
|
| 1190 | } elseif (0xFC == (0xFE & $in)) { |
|
| 1191 | // First octet of 6 octet sequence, see comments for 5 octet sequence. |
|
| 1192 | $mUcs4 = $in; |
|
| 1193 | $mUcs4 = ($mUcs4 & 1) << 30; |
|
| 1194 | $mState = 5; |
|
| 1195 | $mBytes = 6; |
|
| 1196 | } else { |
|
| 1197 | /* Current octet is neither in the US-ASCII range nor a legal first |
|
| 1198 | * octet of a multi-octet sequence. |
|
| 1199 | */ |
|