| @@ 174-180 (lines=7) @@ | ||
| 171 | // reset |
|
| 172 | $char = ''; |
|
| 173 | $mBytes = 1; |
|
| 174 | } elseif (0xC0 == (0xE0 & ($in))) { |
|
| 175 | // First octet of 2 octet sequence |
|
| 176 | $mUcs4 = ($in); |
|
| 177 | $mUcs4 = ($mUcs4 & 0x1F) << 6; |
|
| 178 | $mState = 1; |
|
| 179 | $mBytes = 2; |
|
| 180 | } elseif (0xE0 == (0xF0 & ($in))) { |
|
| 181 | // First octet of 3 octet sequence |
|
| 182 | $mUcs4 = ($in); |
|
| 183 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
| @@ 186-192 (lines=7) @@ | ||
| 183 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
| 184 | $mState = 2; |
|
| 185 | $mBytes = 3; |
|
| 186 | } elseif (0xF0 == (0xF8 & ($in))) { |
|
| 187 | // First octet of 4 octet sequence |
|
| 188 | $mUcs4 = ($in); |
|
| 189 | $mUcs4 = ($mUcs4 & 0x07) << 18; |
|
| 190 | $mState = 3; |
|
| 191 | $mBytes = 4; |
|
| 192 | } elseif (0xF8 == (0xFC & ($in))) { |
|
| 193 | // First octet of 5 octet sequence. |
|
| 194 | // |
|
| 195 | // This is illegal because the encoded codepoint must be |
|
| @@ 206-213 (lines=8) @@ | ||
| 203 | $mUcs4 = ($mUcs4 & 0x03) << 24; |
|
| 204 | $mState = 4; |
|
| 205 | $mBytes = 5; |
|
| 206 | } elseif (0xFC == (0xFE & ($in))) { |
|
| 207 | // First octet of 6 octet sequence, see comments for 5 |
|
| 208 | // octet sequence. |
|
| 209 | $mUcs4 = ($in); |
|
| 210 | $mUcs4 = ($mUcs4 & 1) << 30; |
|
| 211 | $mState = 5; |
|
| 212 | $mBytes = 6; |
|
| 213 | } else { |
|
| 214 | // Current octet is neither in the US-ASCII range nor a |
|
| 215 | // legal first octet of a multi-octet sequence. |
|
| 216 | $mState = 0; |
|