@@ 3178-3184 (lines=7) @@ | ||
3175 | if (0 === (0x80 & $in)) { |
|
3176 | // US-ASCII, pass straight through. |
|
3177 | $mBytes = 1; |
|
3178 | } elseif (0xC0 === (0xE0 & $in)) { |
|
3179 | // First octet of 2 octet sequence. |
|
3180 | $mUcs4 = $in; |
|
3181 | $mUcs4 = ($mUcs4 & 0x1F) << 6; |
|
3182 | $mState = 1; |
|
3183 | $mBytes = 2; |
|
3184 | } elseif (0xE0 === (0xF0 & $in)) { |
|
3185 | // First octet of 3 octet sequence. |
|
3186 | $mUcs4 = $in; |
|
3187 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
@@ 3190-3196 (lines=7) @@ | ||
3187 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
3188 | $mState = 2; |
|
3189 | $mBytes = 3; |
|
3190 | } elseif (0xF0 === (0xF8 & $in)) { |
|
3191 | // First octet of 4 octet sequence. |
|
3192 | $mUcs4 = $in; |
|
3193 | $mUcs4 = ($mUcs4 & 0x07) << 18; |
|
3194 | $mState = 3; |
|
3195 | $mBytes = 4; |
|
3196 | } elseif (0xF8 === (0xFC & $in)) { |
|
3197 | /* First octet of 5 octet sequence. |
|
3198 | * |
|
3199 | * This is illegal because the encoded codepoint must be either |
|
@@ 3209-3215 (lines=7) @@ | ||
3206 | $mUcs4 = ($mUcs4 & 0x03) << 24; |
|
3207 | $mState = 4; |
|
3208 | $mBytes = 5; |
|
3209 | } elseif (0xFC === (0xFE & $in)) { |
|
3210 | // First octet of 6 octet sequence, see comments for 5 octet sequence. |
|
3211 | $mUcs4 = $in; |
|
3212 | $mUcs4 = ($mUcs4 & 1) << 30; |
|
3213 | $mState = 5; |
|
3214 | $mBytes = 6; |
|
3215 | } else { |
|
3216 | /* Current octet is neither in the US-ASCII range nor a legal first |
|
3217 | * octet of a multi-octet sequence. |
|
3218 | */ |