|
@@ 1828-1846 (lines=19) @@
|
| 1825 |
|
* |
| 1826 |
|
* @return string |
| 1827 |
|
*/ |
| 1828 |
|
public static function fix_simple_utf8($str) |
| 1829 |
|
{ |
| 1830 |
|
// init |
| 1831 |
|
$str = (string)$str; |
| 1832 |
|
|
| 1833 |
|
if (!isset($str[0])) { |
| 1834 |
|
return ''; |
| 1835 |
|
} |
| 1836 |
|
|
| 1837 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
| 1838 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
| 1839 |
|
|
| 1840 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
| 1841 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX); |
| 1842 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX); |
| 1843 |
|
} |
| 1844 |
|
|
| 1845 |
|
return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
| 1846 |
|
} |
| 1847 |
|
|
| 1848 |
|
/** |
| 1849 |
|
* Fix a double (or multiple) encoded UTF8 string. |
|
@@ 3594-3612 (lines=19) @@
|
| 3591 |
|
* |
| 3592 |
|
* @return string |
| 3593 |
|
*/ |
| 3594 |
|
public static function normalize_msword($str) |
| 3595 |
|
{ |
| 3596 |
|
$str = (string)$str; |
| 3597 |
|
|
| 3598 |
|
if (!isset($str[0])) { |
| 3599 |
|
return ''; |
| 3600 |
|
} |
| 3601 |
|
|
| 3602 |
|
static $UTF8_MSWORD_KEYS_CACHE = null; |
| 3603 |
|
static $UTF8_MSWORD_VALUES_CACHE = null; |
| 3604 |
|
|
| 3605 |
|
if ($UTF8_MSWORD_KEYS_CACHE === null) { |
| 3606 |
|
$UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD); |
| 3607 |
|
$UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD); |
| 3608 |
|
} |
| 3609 |
|
|
| 3610 |
|
return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str); |
| 3611 |
|
} |
| 3612 |
|
|
| 3613 |
|
/** |
| 3614 |
|
* Normalize the whitespace. |
| 3615 |
|
* |