|
@@ 1849-1867 (lines=19) @@
|
| 1846 |
|
* |
| 1847 |
|
* @return string |
| 1848 |
|
*/ |
| 1849 |
|
public static function fix_simple_utf8($str) |
| 1850 |
|
{ |
| 1851 |
|
// init |
| 1852 |
|
$str = (string)$str; |
| 1853 |
|
|
| 1854 |
|
if (!isset($str[0])) { |
| 1855 |
|
return ''; |
| 1856 |
|
} |
| 1857 |
|
|
| 1858 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
| 1859 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
| 1860 |
|
|
| 1861 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
| 1862 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX); |
| 1863 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX); |
| 1864 |
|
} |
| 1865 |
|
|
| 1866 |
|
return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
| 1867 |
|
} |
| 1868 |
|
|
| 1869 |
|
/** |
| 1870 |
|
* Fix a double (or multiple) encoded UTF8 string. |
|
@@ 3681-3699 (lines=19) @@
|
| 3678 |
|
* |
| 3679 |
|
* @return string |
| 3680 |
|
*/ |
| 3681 |
|
public static function normalize_msword($str) |
| 3682 |
|
{ |
| 3683 |
|
$str = (string)$str; |
| 3684 |
|
|
| 3685 |
|
if (!isset($str[0])) { |
| 3686 |
|
return ''; |
| 3687 |
|
} |
| 3688 |
|
|
| 3689 |
|
static $UTF8_MSWORD_KEYS_CACHE = null; |
| 3690 |
|
static $UTF8_MSWORD_VALUES_CACHE = null; |
| 3691 |
|
|
| 3692 |
|
if ($UTF8_MSWORD_KEYS_CACHE === null) { |
| 3693 |
|
$UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD); |
| 3694 |
|
$UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD); |
| 3695 |
|
} |
| 3696 |
|
|
| 3697 |
|
return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str); |
| 3698 |
|
} |
| 3699 |
|
|
| 3700 |
|
/** |
| 3701 |
|
* Normalize the whitespace. |
| 3702 |
|
* |