|
@@ 1823-1841 (lines=19) @@
|
| 1820 |
|
* |
| 1821 |
|
* @return string |
| 1822 |
|
*/ |
| 1823 |
|
public static function fix_simple_utf8($str) |
| 1824 |
|
{ |
| 1825 |
|
// init |
| 1826 |
|
$str = (string)$str; |
| 1827 |
|
|
| 1828 |
|
if (!isset($str[0])) { |
| 1829 |
|
return ''; |
| 1830 |
|
} |
| 1831 |
|
|
| 1832 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
| 1833 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
| 1834 |
|
|
| 1835 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
| 1836 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX); |
| 1837 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX); |
| 1838 |
|
} |
| 1839 |
|
|
| 1840 |
|
return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
| 1841 |
|
} |
| 1842 |
|
|
| 1843 |
|
/** |
| 1844 |
|
* Fix a double (or multiple) encoded UTF8 string. |
|
@@ 3499-3517 (lines=19) @@
|
| 3496 |
|
* |
| 3497 |
|
* @return string |
| 3498 |
|
*/ |
| 3499 |
|
public static function normalize_msword($str) |
| 3500 |
|
{ |
| 3501 |
|
// init |
| 3502 |
|
$str = (string)$str; |
| 3503 |
|
|
| 3504 |
|
if (!isset($str[0])) { |
| 3505 |
|
return ''; |
| 3506 |
|
} |
| 3507 |
|
|
| 3508 |
|
static $UTF8_MSWORD_KEYS_CACHE = null; |
| 3509 |
|
static $UTF8_MSWORD_VALUES_CACHE = null; |
| 3510 |
|
|
| 3511 |
|
if ($UTF8_MSWORD_KEYS_CACHE === null) { |
| 3512 |
|
$UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD); |
| 3513 |
|
$UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD); |
| 3514 |
|
} |
| 3515 |
|
|
| 3516 |
|
return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str); |
| 3517 |
|
} |
| 3518 |
|
|
| 3519 |
|
/** |
| 3520 |
|
* Normalize the whitespace. |