|
@@ 1226-1246 (lines=21) @@
|
| 1223 |
|
* |
| 1224 |
|
* @return string |
| 1225 |
|
*/ |
| 1226 |
|
public static function fix_simple_utf8(string $str): string |
| 1227 |
|
{ |
| 1228 |
|
if (!isset($str[0])) { |
| 1229 |
|
return ''; |
| 1230 |
|
} |
| 1231 |
|
|
| 1232 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
| 1233 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
| 1234 |
|
|
| 1235 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
| 1236 |
|
|
| 1237 |
|
if (self::$BROKEN_UTF8_FIX === null) { |
| 1238 |
|
self::$BROKEN_UTF8_FIX = self::getData('utf8_fix'); |
| 1239 |
|
} |
| 1240 |
|
|
| 1241 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX); |
| 1242 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX); |
| 1243 |
|
} |
| 1244 |
|
|
| 1245 |
|
return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
| 1246 |
|
} |
| 1247 |
|
|
| 1248 |
|
/** |
| 1249 |
|
* Fix a double (or multiple) encoded UTF8 string. |
|
@@ 3044-3065 (lines=22) @@
|
| 3041 |
|
* |
| 3042 |
|
* @return string |
| 3043 |
|
*/ |
| 3044 |
|
public static function normalize_msword(string $str): string |
| 3045 |
|
{ |
| 3046 |
|
if (!isset($str[0])) { |
| 3047 |
|
return ''; |
| 3048 |
|
} |
| 3049 |
|
|
| 3050 |
|
static $UTF8_MSWORD_KEYS_CACHE = null; |
| 3051 |
|
static $UTF8_MSWORD_VALUES_CACHE = null; |
| 3052 |
|
|
| 3053 |
|
if ($UTF8_MSWORD_KEYS_CACHE === null) { |
| 3054 |
|
|
| 3055 |
|
if (self::$UTF8_MSWORD === null) { |
| 3056 |
|
self::$UTF8_MSWORD = self::getData('utf8_msword'); |
| 3057 |
|
} |
| 3058 |
|
|
| 3059 |
|
$UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD); |
| 3060 |
|
$UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD); |
| 3061 |
|
} |
| 3062 |
|
|
| 3063 |
|
return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str); |
| 3064 |
|
} |
| 3065 |
|
|
| 3066 |
|
/** |
| 3067 |
|
* Normalize the whitespace. |
| 3068 |
|
* |