|
@@ 1733-1751 (lines=19) @@
|
| 1730 |
|
* |
| 1731 |
|
* @return string |
| 1732 |
|
*/ |
| 1733 |
|
public static function fix_simple_utf8($str) |
| 1734 |
|
{ |
| 1735 |
|
// init |
| 1736 |
|
$str = (string)$str; |
| 1737 |
|
|
| 1738 |
|
if (!isset($str[0])) { |
| 1739 |
|
return ''; |
| 1740 |
|
} |
| 1741 |
|
|
| 1742 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
| 1743 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
| 1744 |
|
|
| 1745 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
| 1746 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8); |
| 1747 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8); |
| 1748 |
|
} |
| 1749 |
|
|
| 1750 |
|
return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
| 1751 |
|
} |
| 1752 |
|
|
| 1753 |
|
/** |
| 1754 |
|
* Fix a double (or multiple) encoded UTF8 string. |
|
@@ 3302-3320 (lines=19) @@
|
| 3299 |
|
* |
| 3300 |
|
* @return string |
| 3301 |
|
*/ |
| 3302 |
|
public static function normalize_msword($str) |
| 3303 |
|
{ |
| 3304 |
|
// init |
| 3305 |
|
$str = (string)$str; |
| 3306 |
|
|
| 3307 |
|
if (!isset($str[0])) { |
| 3308 |
|
return ''; |
| 3309 |
|
} |
| 3310 |
|
|
| 3311 |
|
static $UTF8_MSWORD_KEYS_CACHE = null; |
| 3312 |
|
static $UTF8_MSWORD_VALUES_CACHE = null; |
| 3313 |
|
|
| 3314 |
|
if ($UTF8_MSWORD_KEYS_CACHE === null) { |
| 3315 |
|
$UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord); |
| 3316 |
|
$UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord); |
| 3317 |
|
} |
| 3318 |
|
|
| 3319 |
|
return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str); |
| 3320 |
|
} |
| 3321 |
|
|
| 3322 |
|
/** |
| 3323 |
|
* Normalize the whitespace. |