|
@@ 1823-1841 (lines=19) @@
|
| 1820 |
|
* |
| 1821 |
|
* @return string |
| 1822 |
|
*/ |
| 1823 |
|
public static function fix_simple_utf8($str) |
| 1824 |
|
{ |
| 1825 |
|
// init |
| 1826 |
|
$str = (string)$str; |
| 1827 |
|
|
| 1828 |
|
if (!isset($str[0])) { |
| 1829 |
|
return ''; |
| 1830 |
|
} |
| 1831 |
|
|
| 1832 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
| 1833 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
| 1834 |
|
|
| 1835 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
| 1836 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX); |
| 1837 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX); |
| 1838 |
|
} |
| 1839 |
|
|
| 1840 |
|
return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
| 1841 |
|
} |
| 1842 |
|
|
| 1843 |
|
/** |
| 1844 |
|
* Fix a double (or multiple) encoded UTF8 string. |
|
@@ 3579-3597 (lines=19) @@
|
| 3576 |
|
* |
| 3577 |
|
* @return string |
| 3578 |
|
*/ |
| 3579 |
|
public static function normalize_msword($str) |
| 3580 |
|
{ |
| 3581 |
|
// init |
| 3582 |
|
$str = (string)$str; |
| 3583 |
|
|
| 3584 |
|
if (!isset($str[0])) { |
| 3585 |
|
return ''; |
| 3586 |
|
} |
| 3587 |
|
|
| 3588 |
|
static $UTF8_MSWORD_KEYS_CACHE = null; |
| 3589 |
|
static $UTF8_MSWORD_VALUES_CACHE = null; |
| 3590 |
|
|
| 3591 |
|
if ($UTF8_MSWORD_KEYS_CACHE === null) { |
| 3592 |
|
$UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD); |
| 3593 |
|
$UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD); |
| 3594 |
|
} |
| 3595 |
|
|
| 3596 |
|
return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str); |
| 3597 |
|
} |
| 3598 |
|
|
| 3599 |
|
/** |
| 3600 |
|
* Normalize the whitespace. |