|
@@ 6798-6803 (lines=6) @@
|
| 6795 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already |
| 6796 |
|
$buf .= $c1 . $c2; |
| 6797 |
|
$i++; |
| 6798 |
|
} else { // not valid UTF8 - convert it |
| 6799 |
|
$cc1tmp = ord($c1) / 64; |
| 6800 |
|
$cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0"; |
| 6801 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 6802 |
|
$buf .= $cc1 . $cc2; |
| 6803 |
|
} |
| 6804 |
|
|
| 6805 |
|
} elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8 |
| 6806 |
|
|
|
@@ 6813-6818 (lines=6) @@
|
| 6810 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already |
| 6811 |
|
$buf .= $c1 . $c2 . $c3; |
| 6812 |
|
$i += 2; |
| 6813 |
|
} else { // not valid UTF8 - convert it |
| 6814 |
|
$cc1tmp = ord($c1) / 64; |
| 6815 |
|
$cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0"; |
| 6816 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 6817 |
|
$buf .= $cc1 . $cc2; |
| 6818 |
|
} |
| 6819 |
|
|
| 6820 |
|
} elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8 |
| 6821 |
|
|
|
@@ 6829-6834 (lines=6) @@
|
| 6826 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already |
| 6827 |
|
$buf .= $c1 . $c2 . $c3 . $c4; |
| 6828 |
|
$i += 3; |
| 6829 |
|
} else { // not valid UTF8 - convert it |
| 6830 |
|
$cc1tmp = ord($c1) / 64; |
| 6831 |
|
$cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0"; |
| 6832 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 6833 |
|
$buf .= $cc1 . $cc2; |
| 6834 |
|
} |
| 6835 |
|
|
| 6836 |
|
} else { // doesn't look like UTF8, but should be converted |
| 6837 |
|
$cc1tmp = ord($c1) / 64; |
|
@@ 6836-6841 (lines=6) @@
|
| 6833 |
|
$buf .= $cc1 . $cc2; |
| 6834 |
|
} |
| 6835 |
|
|
| 6836 |
|
} else { // doesn't look like UTF8, but should be converted |
| 6837 |
|
$cc1tmp = ord($c1) / 64; |
| 6838 |
|
$cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0"; |
| 6839 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 6840 |
|
$buf .= $cc1 . $cc2; |
| 6841 |
|
} |
| 6842 |
|
|
| 6843 |
|
} elseif (($c1 & "\xC0") === "\x80") { // needs conversion |
| 6844 |
|
|
|
@@ 6848-6852 (lines=5) @@
|
| 6845 |
|
$ordC1 = ord($c1); |
| 6846 |
|
if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases |
| 6847 |
|
$buf .= self::$WIN1252_TO_UTF8[$ordC1]; |
| 6848 |
|
} else { |
| 6849 |
|
$cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0"; |
| 6850 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 6851 |
|
$buf .= $cc1 . $cc2; |
| 6852 |
|
} |
| 6853 |
|
|
| 6854 |
|
} else { // it doesn't need conversion |
| 6855 |
|
$buf .= $c1; |