|
@@ 6692-6697 (lines=6) @@
|
| 6689 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already |
| 6690 |
|
$buf .= $c1 . $c2; |
| 6691 |
|
$i++; |
| 6692 |
|
} else { // not valid UTF8 - convert it |
| 6693 |
|
$cc1tmp = ord($c1) / 64; |
| 6694 |
|
$cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0"; |
| 6695 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 6696 |
|
$buf .= $cc1 . $cc2; |
| 6697 |
|
} |
| 6698 |
|
|
| 6699 |
|
} elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8 |
| 6700 |
|
|
|
@@ 6707-6712 (lines=6) @@
|
| 6704 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already |
| 6705 |
|
$buf .= $c1 . $c2 . $c3; |
| 6706 |
|
$i += 2; |
| 6707 |
|
} else { // not valid UTF8 - convert it |
| 6708 |
|
$cc1tmp = ord($c1) / 64; |
| 6709 |
|
$cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0"; |
| 6710 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 6711 |
|
$buf .= $cc1 . $cc2; |
| 6712 |
|
} |
| 6713 |
|
|
| 6714 |
|
} elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8 |
| 6715 |
|
|
|
@@ 6723-6728 (lines=6) @@
|
| 6720 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already |
| 6721 |
|
$buf .= $c1 . $c2 . $c3 . $c4; |
| 6722 |
|
$i += 3; |
| 6723 |
|
} else { // not valid UTF8 - convert it |
| 6724 |
|
$cc1tmp = ord($c1) / 64; |
| 6725 |
|
$cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0"; |
| 6726 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 6727 |
|
$buf .= $cc1 . $cc2; |
| 6728 |
|
} |
| 6729 |
|
|
| 6730 |
|
} else { // doesn't look like UTF8, but should be converted |
| 6731 |
|
$cc1tmp = ord($c1) / 64; |
|
@@ 6730-6735 (lines=6) @@
|
| 6727 |
|
$buf .= $cc1 . $cc2; |
| 6728 |
|
} |
| 6729 |
|
|
| 6730 |
|
} else { // doesn't look like UTF8, but should be converted |
| 6731 |
|
$cc1tmp = ord($c1) / 64; |
| 6732 |
|
$cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0"; |
| 6733 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 6734 |
|
$buf .= $cc1 . $cc2; |
| 6735 |
|
} |
| 6736 |
|
|
| 6737 |
|
} elseif (($c1 & "\xC0") === "\x80") { // needs conversion |
| 6738 |
|
|
|
@@ 6742-6746 (lines=5) @@
|
| 6739 |
|
$ordC1 = ord($c1); |
| 6740 |
|
if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases |
| 6741 |
|
$buf .= self::$WIN1252_TO_UTF8[$ordC1]; |
| 6742 |
|
} else { |
| 6743 |
|
$cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0"; |
| 6744 |
|
$cc2 = ($c1 & "\x3F") | "\x80"; |
| 6745 |
|
$buf .= $cc1 . $cc2; |
| 6746 |
|
} |
| 6747 |
|
|
| 6748 |
|
} else { // it doesn't need conversion |
| 6749 |
|
$buf .= $c1; |