|
@@ 1719-1737 (lines=19) @@
|
| 1716 |
|
* |
| 1717 |
|
* @return string |
| 1718 |
|
*/ |
| 1719 |
|
public static function fix_simple_utf8($str) |
| 1720 |
|
{ |
| 1721 |
|
// init |
| 1722 |
|
$str = (string)$str; |
| 1723 |
|
|
| 1724 |
|
if (!isset($str[0])) { |
| 1725 |
|
return ''; |
| 1726 |
|
} |
| 1727 |
|
|
| 1728 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
| 1729 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
| 1730 |
|
|
| 1731 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
| 1732 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8); |
| 1733 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8); |
| 1734 |
|
} |
| 1735 |
|
|
| 1736 |
|
return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
| 1737 |
|
} |
| 1738 |
|
|
| 1739 |
|
/** |
| 1740 |
|
* Fix a double (or multiple) encoded UTF8 string. |
|
@@ 3288-3306 (lines=19) @@
|
| 3285 |
|
* |
| 3286 |
|
* @return string |
| 3287 |
|
*/ |
| 3288 |
|
public static function normalize_msword($str) |
| 3289 |
|
{ |
| 3290 |
|
// init |
| 3291 |
|
$str = (string)$str; |
| 3292 |
|
|
| 3293 |
|
if (!isset($str[0])) { |
| 3294 |
|
return ''; |
| 3295 |
|
} |
| 3296 |
|
|
| 3297 |
|
static $UTF8_MSWORD_KEYS_CACHE = null; |
| 3298 |
|
static $UTF8_MSWORD_VALUES_CACHE = null; |
| 3299 |
|
|
| 3300 |
|
if ($UTF8_MSWORD_KEYS_CACHE === null) { |
| 3301 |
|
$UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord); |
| 3302 |
|
$UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord); |
| 3303 |
|
} |
| 3304 |
|
|
| 3305 |
|
return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str); |
| 3306 |
|
} |
| 3307 |
|
|
| 3308 |
|
/** |
| 3309 |
|
* Normalize the whitespace. |