|
@@ 7138-7143 (lines=6) @@
|
| 7135 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already |
| 7136 |
|
$buf .= $c1 . $c2; |
| 7137 |
|
$i++; |
| 7138 |
|
} else { // not valid UTF8 - convert it |
| 7139 |
|
$cc1tmp = ord($c1) / 64; |
| 7140 |
|
$cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0"; |
| 7141 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 7142 |
|
$buf .= $cc1 . $cc2; |
| 7143 |
|
} |
| 7144 |
|
|
| 7145 |
|
} elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8 |
| 7146 |
|
|
|
@@ 7153-7158 (lines=6) @@
|
| 7150 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already |
| 7151 |
|
$buf .= $c1 . $c2 . $c3; |
| 7152 |
|
$i += 2; |
| 7153 |
|
} else { // not valid UTF8 - convert it |
| 7154 |
|
$cc1tmp = ord($c1) / 64; |
| 7155 |
|
$cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0"; |
| 7156 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 7157 |
|
$buf .= $cc1 . $cc2; |
| 7158 |
|
} |
| 7159 |
|
|
| 7160 |
|
} elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8 |
| 7161 |
|
|
|
@@ 7169-7174 (lines=6) @@
|
| 7166 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already |
| 7167 |
|
$buf .= $c1 . $c2 . $c3 . $c4; |
| 7168 |
|
$i += 3; |
| 7169 |
|
} else { // not valid UTF8 - convert it |
| 7170 |
|
$cc1tmp = ord($c1) / 64; |
| 7171 |
|
$cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0"; |
| 7172 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 7173 |
|
$buf .= $cc1 . $cc2; |
| 7174 |
|
} |
| 7175 |
|
|
| 7176 |
|
} else { // doesn't look like UTF8, but should be converted |
| 7177 |
|
$cc1tmp = ord($c1) / 64; |
|
@@ 7176-7181 (lines=6) @@
|
| 7173 |
|
$buf .= $cc1 . $cc2; |
| 7174 |
|
} |
| 7175 |
|
|
| 7176 |
|
} else { // doesn't look like UTF8, but should be converted |
| 7177 |
|
$cc1tmp = ord($c1) / 64; |
| 7178 |
|
$cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0"; |
| 7179 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 7180 |
|
$buf .= $cc1 . $cc2; |
| 7181 |
|
} |
| 7182 |
|
|
| 7183 |
|
} elseif (($c1 & "\xC0") === "\x80") { // needs conversion |
| 7184 |
|
|
|
@@ 7188-7192 (lines=5) @@
|
| 7185 |
|
$ordC1 = ord($c1); |
| 7186 |
|
if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases |
| 7187 |
|
$buf .= self::$WIN1252_TO_UTF8[$ordC1]; |
| 7188 |
|
} else { |
| 7189 |
|
$cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0"; |
| 7190 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 7191 |
|
$buf .= $cc1 . $cc2; |
| 7192 |
|
} |
| 7193 |
|
|
| 7194 |
|
} else { // it doesn't need conversion |
| 7195 |
|
$buf .= $c1; |