@@ 174-180 (lines=7) @@ | ||
171 | // reset |
|
172 | $char = ''; |
|
173 | $mBytes = 1; |
|
174 | } elseif (0xC0 == (0xE0 & ($in))) { |
|
175 | // First octet of 2 octet sequence |
|
176 | $mUcs4 = ($in); |
|
177 | $mUcs4 = ($mUcs4 & 0x1F) << 6; |
|
178 | $mState = 1; |
|
179 | $mBytes = 2; |
|
180 | } elseif (0xE0 == (0xF0 & ($in))) { |
|
181 | // First octet of 3 octet sequence |
|
182 | $mUcs4 = ($in); |
|
183 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
@@ 186-192 (lines=7) @@ | ||
183 | $mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
184 | $mState = 2; |
|
185 | $mBytes = 3; |
|
186 | } elseif (0xF0 == (0xF8 & ($in))) { |
|
187 | // First octet of 4 octet sequence |
|
188 | $mUcs4 = ($in); |
|
189 | $mUcs4 = ($mUcs4 & 0x07) << 18; |
|
190 | $mState = 3; |
|
191 | $mBytes = 4; |
|
192 | } elseif (0xF8 == (0xFC & ($in))) { |
|
193 | // First octet of 5 octet sequence. |
|
194 | // |
|
195 | // This is illegal because the encoded codepoint must be |
|
@@ 206-213 (lines=8) @@ | ||
203 | $mUcs4 = ($mUcs4 & 0x03) << 24; |
|
204 | $mState = 4; |
|
205 | $mBytes = 5; |
|
206 | } elseif (0xFC == (0xFE & ($in))) { |
|
207 | // First octet of 6 octet sequence, see comments for 5 |
|
208 | // octet sequence. |
|
209 | $mUcs4 = ($in); |
|
210 | $mUcs4 = ($mUcs4 & 1) << 30; |
|
211 | $mState = 5; |
|
212 | $mBytes = 6; |
|
213 | } else { |
|
214 | // Current octet is neither in the US-ASCII range nor a |
|
215 | // legal first octet of a multi-octet sequence. |
|
216 | $mState = 0; |