|
@@ 1773-1791 (lines=19) @@
|
| 1770 |
|
* |
| 1771 |
|
* @return string |
| 1772 |
|
*/ |
| 1773 |
|
public static function fix_simple_utf8($str) |
| 1774 |
|
{ |
| 1775 |
|
// init |
| 1776 |
|
$str = (string)$str; |
| 1777 |
|
|
| 1778 |
|
if (!isset($str[0])) { |
| 1779 |
|
return ''; |
| 1780 |
|
} |
| 1781 |
|
|
| 1782 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
| 1783 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
| 1784 |
|
|
| 1785 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
| 1786 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8); |
| 1787 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8); |
| 1788 |
|
} |
| 1789 |
|
|
| 1790 |
|
return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
| 1791 |
|
} |
| 1792 |
|
|
| 1793 |
|
/** |
| 1794 |
|
* Fix a double (or multiple) encoded UTF8 string. |
|
@@ 3393-3411 (lines=19) @@
|
| 3390 |
|
* |
| 3391 |
|
* @return string |
| 3392 |
|
*/ |
| 3393 |
|
public static function normalize_msword($str) |
| 3394 |
|
{ |
| 3395 |
|
// init |
| 3396 |
|
$str = (string)$str; |
| 3397 |
|
|
| 3398 |
|
if (!isset($str[0])) { |
| 3399 |
|
return ''; |
| 3400 |
|
} |
| 3401 |
|
|
| 3402 |
|
static $UTF8_MSWORD_KEYS_CACHE = null; |
| 3403 |
|
static $UTF8_MSWORD_VALUES_CACHE = null; |
| 3404 |
|
|
| 3405 |
|
if ($UTF8_MSWORD_KEYS_CACHE === null) { |
| 3406 |
|
$UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord); |
| 3407 |
|
$UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord); |
| 3408 |
|
} |
| 3409 |
|
|
| 3410 |
|
return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str); |
| 3411 |
|
} |
| 3412 |
|
|
| 3413 |
|
/** |
| 3414 |
|
* Normalize the whitespace. |