|
@@ 1787-1805 (lines=19) @@
|
| 1784 |
|
* |
| 1785 |
|
* @return string |
| 1786 |
|
*/ |
| 1787 |
|
public static function fix_simple_utf8($str) |
| 1788 |
|
{ |
| 1789 |
|
// init |
| 1790 |
|
$str = (string)$str; |
| 1791 |
|
|
| 1792 |
|
if (!isset($str[0])) { |
| 1793 |
|
return ''; |
| 1794 |
|
} |
| 1795 |
|
|
| 1796 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
| 1797 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
| 1798 |
|
|
| 1799 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
| 1800 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX); |
| 1801 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX); |
| 1802 |
|
} |
| 1803 |
|
|
| 1804 |
|
return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
| 1805 |
|
} |
| 1806 |
|
|
| 1807 |
|
/** |
| 1808 |
|
* Fix a double (or multiple) encoded UTF8 string. |
|
@@ 3428-3446 (lines=19) @@
|
| 3425 |
|
* |
| 3426 |
|
* @return string |
| 3427 |
|
*/ |
| 3428 |
|
public static function normalize_msword($str) |
| 3429 |
|
{ |
| 3430 |
|
// init |
| 3431 |
|
$str = (string)$str; |
| 3432 |
|
|
| 3433 |
|
if (!isset($str[0])) { |
| 3434 |
|
return ''; |
| 3435 |
|
} |
| 3436 |
|
|
| 3437 |
|
static $UTF8_MSWORD_KEYS_CACHE = null; |
| 3438 |
|
static $UTF8_MSWORD_VALUES_CACHE = null; |
| 3439 |
|
|
| 3440 |
|
if ($UTF8_MSWORD_KEYS_CACHE === null) { |
| 3441 |
|
$UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD); |
| 3442 |
|
$UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD); |
| 3443 |
|
} |
| 3444 |
|
|
| 3445 |
|
return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str); |
| 3446 |
|
} |
| 3447 |
|
|
| 3448 |
|
/** |
| 3449 |
|
* Normalize the whitespace. |