fisharebest /
webtrees
| 1 | <?php |
||
| 2 | |||
| 3 | /** |
||
| 4 | * webtrees: online genealogy |
||
| 5 | * Copyright (C) 2025 webtrees development team |
||
| 6 | * This program is free software: you can redistribute it and/or modify |
||
| 7 | * it under the terms of the GNU General Public License as published by |
||
| 8 | * the Free Software Foundation, either version 3 of the License, or |
||
| 9 | * (at your option) any later version. |
||
| 10 | * This program is distributed in the hope that it will be useful, |
||
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
| 13 | * GNU General Public License for more details. |
||
| 14 | * You should have received a copy of the GNU General Public License |
||
| 15 | * along with this program. If not, see <https://www.gnu.org/licenses/>. |
||
| 16 | */ |
||
| 17 | |||
| 18 | declare(strict_types=1); |
||
| 19 | |||
| 20 | namespace Fisharebest\Webtrees\Encodings; |
||
| 21 | |||
| 22 | use function array_flip; |
||
| 23 | use function array_map; |
||
| 24 | use function implode; |
||
| 25 | use function ord; |
||
| 26 | use function preg_split; |
||
| 27 | use function strlen; |
||
| 28 | use function strrpos; |
||
| 29 | use function strtr; |
||
| 30 | |||
| 31 | use const PREG_SPLIT_NO_EMPTY; |
||
| 32 | |||
| 33 | /** |
||
| 34 | * Convert between an encoding and UTF-8. |
||
| 35 | */ |
||
| 36 | abstract class AbstractEncoding implements EncodingInterface |
||
| 37 | { |
||
| 38 | protected const string REPLACEMENT_CHARACTER = '?'; |
||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
| 39 | |||
| 40 | /** @var array<string,string> Encoded character => utf8 character */ |
||
| 41 | protected const array TO_UTF8 = []; |
||
| 42 | |||
| 43 | /** |
||
| 44 | * Convert a string from UTF-8 to another encoding. |
||
| 45 | * |
||
| 46 | * @param string $text |
||
| 47 | * |
||
| 48 | * @return string |
||
| 49 | */ |
||
| 50 | public function fromUtf8(string $text): string |
||
| 51 | { |
||
| 52 | $utf8 = array_flip(static::TO_UTF8); |
||
| 53 | $utf8[UTF8::REPLACEMENT_CHARACTER] = static::REPLACEMENT_CHARACTER; |
||
| 54 | |||
| 55 | $chars = preg_split('//u', $text, -1, PREG_SPLIT_NO_EMPTY); |
||
| 56 | $chars = array_map(static function (string $char) use ($utf8): string { |
||
| 57 | if (ord($char[0]) < 128) { |
||
| 58 | return $char; |
||
| 59 | } |
||
| 60 | |||
| 61 | return $utf8[$char] ?? static::REPLACEMENT_CHARACTER; |
||
| 62 | }, $chars); |
||
| 63 | |||
| 64 | return implode('', $chars); |
||
| 65 | } |
||
| 66 | |||
| 67 | /** |
||
| 68 | * Convert a string from another encoding to UTF-8. |
||
| 69 | * |
||
| 70 | * @param string $text |
||
| 71 | * |
||
| 72 | * @return string |
||
| 73 | */ |
||
| 74 | public function toUtf8(string $text): string |
||
| 75 | { |
||
| 76 | return strtr($text, static::TO_UTF8); |
||
| 77 | } |
||
| 78 | |||
| 79 | /** |
||
| 80 | * When reading multi-byte encodings using a stream, we must avoid incomplete characters. |
||
| 81 | * |
||
| 82 | * @param string $text |
||
| 83 | * |
||
| 84 | * @return int |
||
| 85 | */ |
||
| 86 | public function convertibleBytes(string $text): int |
||
| 87 | { |
||
| 88 | $safe_chars = [ |
||
| 89 | $this->fromUtf8("\n"), |
||
| 90 | $this->fromUtf8("\r"), |
||
| 91 | $this->fromUtf8(' '), |
||
| 92 | ]; |
||
| 93 | |||
| 94 | foreach ($safe_chars as $char) { |
||
| 95 | $pos = strrpos($text, $char); |
||
| 96 | |||
| 97 | if ($pos !== false) { |
||
| 98 | return $pos + strlen($char); |
||
| 99 | } |
||
| 100 | } |
||
| 101 | |||
| 102 | return 0; |
||
| 103 | } |
||
| 104 | } |
||
| 105 |