|
@@ 1816-1834 (lines=19) @@
|
| 1813 |
|
* |
| 1814 |
|
* @return string |
| 1815 |
|
*/ |
| 1816 |
|
public static function fix_simple_utf8($str) |
| 1817 |
|
{ |
| 1818 |
|
// init |
| 1819 |
|
$str = (string)$str; |
| 1820 |
|
|
| 1821 |
|
if (!isset($str[0])) { |
| 1822 |
|
return ''; |
| 1823 |
|
} |
| 1824 |
|
|
| 1825 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
| 1826 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
| 1827 |
|
|
| 1828 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
| 1829 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX); |
| 1830 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX); |
| 1831 |
|
} |
| 1832 |
|
|
| 1833 |
|
return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
| 1834 |
|
} |
| 1835 |
|
|
| 1836 |
|
/** |
| 1837 |
|
* Fix a double (or multiple) encoded UTF8 string. |
|
@@ 3492-3510 (lines=19) @@
|
| 3489 |
|
* |
| 3490 |
|
* @return string |
| 3491 |
|
*/ |
| 3492 |
|
public static function normalize_msword($str) |
| 3493 |
|
{ |
| 3494 |
|
// init |
| 3495 |
|
$str = (string)$str; |
| 3496 |
|
|
| 3497 |
|
if (!isset($str[0])) { |
| 3498 |
|
return ''; |
| 3499 |
|
} |
| 3500 |
|
|
| 3501 |
|
static $UTF8_MSWORD_KEYS_CACHE = null; |
| 3502 |
|
static $UTF8_MSWORD_VALUES_CACHE = null; |
| 3503 |
|
|
| 3504 |
|
if ($UTF8_MSWORD_KEYS_CACHE === null) { |
| 3505 |
|
$UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD); |
| 3506 |
|
$UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD); |
| 3507 |
|
} |
| 3508 |
|
|
| 3509 |
|
return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str); |
| 3510 |
|
} |
| 3511 |
|
|
| 3512 |
|
/** |
| 3513 |
|
* Normalize the whitespace. |