|
@@ 2078-2089 (lines=12) @@
|
| 2075 |
|
$buf .= $cc1 . $cc2; |
| 2076 |
|
} |
| 2077 |
|
|
| 2078 |
|
} elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8 |
| 2079 |
|
|
| 2080 |
|
if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already |
| 2081 |
|
$buf .= $c1 . $c2 . $c3; |
| 2082 |
|
$i += 2; |
| 2083 |
|
} else { // not valid UTF8 - convert it |
| 2084 |
|
$cc1 = (chr(ord($c1) / 64) | "\xc0"); |
| 2085 |
|
$cc2 = ($c1 & "\x3f") | "\x80"; |
| 2086 |
|
$buf .= $cc1 . $cc2; |
| 2087 |
|
} |
| 2088 |
|
|
| 2089 |
|
} elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8 |
| 2090 |
|
|
| 2091 |
|
if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already |
| 2092 |
|
$buf .= $c1 . $c2 . $c3 . $c4; |
|
@@ 2091-2098 (lines=8) @@
|
| 2088 |
|
|
| 2089 |
|
} elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8 |
| 2090 |
|
|
| 2091 |
|
if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already |
| 2092 |
|
$buf .= $c1 . $c2 . $c3 . $c4; |
| 2093 |
|
$i += 3; |
| 2094 |
|
} else { // not valid UTF8 - convert it |
| 2095 |
|
$cc1 = (chr(ord($c1) / 64) | "\xc0"); |
| 2096 |
|
$cc2 = ($c1 & "\x3f") | "\x80"; |
| 2097 |
|
$buf .= $cc1 . $cc2; |
| 2098 |
|
} |
| 2099 |
|
|
| 2100 |
|
} else { // doesn't look like UTF8, but should be converted |
| 2101 |
|
$cc1 = (chr(ord($c1) / 64) | "\xc0"); |