|
@@ 1531-1551 (lines=21) @@
|
| 1528 |
|
* |
| 1529 |
|
* @return string |
| 1530 |
|
*/ |
| 1531 |
|
public static function fix_simple_utf8(string $str): string |
| 1532 |
|
{ |
| 1533 |
|
if (!isset($str[0])) { |
| 1534 |
|
return ''; |
| 1535 |
|
} |
| 1536 |
|
|
| 1537 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
| 1538 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
| 1539 |
|
|
| 1540 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
| 1541 |
|
|
| 1542 |
|
if (self::$BROKEN_UTF8_FIX === null) { |
| 1543 |
|
self::$BROKEN_UTF8_FIX = self::getData('utf8_fix'); |
| 1544 |
|
} |
| 1545 |
|
|
| 1546 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX); |
| 1547 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX); |
| 1548 |
|
} |
| 1549 |
|
|
| 1550 |
|
return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
| 1551 |
|
} |
| 1552 |
|
|
| 1553 |
|
/** |
| 1554 |
|
* Fix a double (or multiple) encoded UTF8 string. |
|
@@ 3673-3694 (lines=22) @@
|
| 3670 |
|
* |
| 3671 |
|
* @return string |
| 3672 |
|
*/ |
| 3673 |
|
public static function normalize_msword(string $str): string |
| 3674 |
|
{ |
| 3675 |
|
if (!isset($str[0])) { |
| 3676 |
|
return ''; |
| 3677 |
|
} |
| 3678 |
|
|
| 3679 |
|
static $UTF8_MSWORD_KEYS_CACHE = null; |
| 3680 |
|
static $UTF8_MSWORD_VALUES_CACHE = null; |
| 3681 |
|
|
| 3682 |
|
if ($UTF8_MSWORD_KEYS_CACHE === null) { |
| 3683 |
|
|
| 3684 |
|
if (self::$UTF8_MSWORD === null) { |
| 3685 |
|
self::$UTF8_MSWORD = self::getData('utf8_msword'); |
| 3686 |
|
} |
| 3687 |
|
|
| 3688 |
|
$UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD); |
| 3689 |
|
$UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD); |
| 3690 |
|
} |
| 3691 |
|
|
| 3692 |
|
return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str); |
| 3693 |
|
} |
| 3694 |
|
|
| 3695 |
|
/** |
| 3696 |
|
* Normalize the whitespace. |
| 3697 |
|
* |