|
@@ 1734-1752 (lines=19) @@
|
| 1731 |
|
* |
| 1732 |
|
* @return string |
| 1733 |
|
*/ |
| 1734 |
|
public static function fix_simple_utf8($str) |
| 1735 |
|
{ |
| 1736 |
|
// init |
| 1737 |
|
$str = (string)$str; |
| 1738 |
|
|
| 1739 |
|
if (!isset($str[0])) { |
| 1740 |
|
return ''; |
| 1741 |
|
} |
| 1742 |
|
|
| 1743 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
| 1744 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
| 1745 |
|
|
| 1746 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
| 1747 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8); |
| 1748 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8); |
| 1749 |
|
} |
| 1750 |
|
|
| 1751 |
|
return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
| 1752 |
|
} |
| 1753 |
|
|
| 1754 |
|
/** |
| 1755 |
|
* Fix a double (or multiple) encoded UTF8 string. |
|
@@ 3328-3346 (lines=19) @@
|
| 3325 |
|
* |
| 3326 |
|
* @return string |
| 3327 |
|
*/ |
| 3328 |
|
public static function normalize_msword($str) |
| 3329 |
|
{ |
| 3330 |
|
// init |
| 3331 |
|
$str = (string)$str; |
| 3332 |
|
|
| 3333 |
|
if (!isset($str[0])) { |
| 3334 |
|
return ''; |
| 3335 |
|
} |
| 3336 |
|
|
| 3337 |
|
static $UTF8_MSWORD_KEYS_CACHE = null; |
| 3338 |
|
static $UTF8_MSWORD_VALUES_CACHE = null; |
| 3339 |
|
|
| 3340 |
|
if ($UTF8_MSWORD_KEYS_CACHE === null) { |
| 3341 |
|
$UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord); |
| 3342 |
|
$UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord); |
| 3343 |
|
} |
| 3344 |
|
|
| 3345 |
|
return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str); |
| 3346 |
|
} |
| 3347 |
|
|
| 3348 |
|
/** |
| 3349 |
|
* Normalize the whitespace. |