| @@ 872-878 (lines=7) @@ | ||
| 869 | ||
| 870 | function lx_seemsUtf8($Str) { # by bmorel at ssi dot fr |
|
| 871 | for ($i=0; $i<strlen($Str); $i++) { |
|
| 872 | if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb |
|
| 873 | elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb |
|
| 874 | elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb |
|
| 875 | elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb |
|
| 876 | elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb |
|
| 877 | elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b |
|
| 878 | else return false; # Does not match any model |
|
| 879 | for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? |
|
| 880 | if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80)) |
|
| 881 | return false; |
|
| @@ 1287-1307 (lines=21) @@ | ||
| 1284 | public static function isUtf8($Str) |
|
| 1285 | { # by bmorel at ssi dot fr |
|
| 1286 | for ($i = 0, $iMax = strlen($Str); $i < $iMax; ++$i) { |
|
| 1287 | if (ord($Str[$i]) < 0x80) { |
|
| 1288 | continue; |
|
| 1289 | } # 0bbbbbbb |
|
| 1290 | elseif ((ord($Str[$i]) & 0xE0) == 0xC0) { |
|
| 1291 | $n = 1; |
|
| 1292 | } # 110bbbbb |
|
| 1293 | elseif ((ord($Str[$i]) & 0xF0) == 0xE0) { |
|
| 1294 | $n = 2; |
|
| 1295 | } # 1110bbbb |
|
| 1296 | elseif ((ord($Str[$i]) & 0xF8) == 0xF0) { |
|
| 1297 | $n = 3; |
|
| 1298 | } # 11110bbb |
|
| 1299 | elseif ((ord($Str[$i]) & 0xFC) == 0xF8) { |
|
| 1300 | $n = 4; |
|
| 1301 | } # 111110bb |
|
| 1302 | elseif ((ord($Str[$i]) & 0xFE) == 0xFC) { |
|
| 1303 | $n = 5; |
|
| 1304 | } # 1111110b |
|
| 1305 | else { |
|
| 1306 | return false; |
|
| 1307 | } # Does not match any model |
|
| 1308 | for ($j = 0; $j < $n; ++$j) { # n bytes matching 10bbbbbb follow ? |
|
| 1309 | if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80)) { |
|
| 1310 | return false; |
|