fisharebest /
webtrees
| 1 | <?php |
||
| 2 | |||
| 3 | /** |
||
| 4 | * webtrees: online genealogy |
||
| 5 | * Copyright (C) 2025 webtrees development team |
||
| 6 | * This program is free software: you can redistribute it and/or modify |
||
| 7 | * it under the terms of the GNU General Public License as published by |
||
| 8 | * the Free Software Foundation, either version 3 of the License, or |
||
| 9 | * (at your option) any later version. |
||
| 10 | * This program is distributed in the hope that it will be useful, |
||
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
| 13 | * GNU General Public License for more details. |
||
| 14 | * You should have received a copy of the GNU General Public License |
||
| 15 | * along with this program. If not, see <https://www.gnu.org/licenses/>. |
||
| 16 | */ |
||
| 17 | |||
| 18 | declare(strict_types=1); |
||
| 19 | |||
| 20 | namespace Fisharebest\Webtrees\Encodings; |
||
| 21 | |||
| 22 | use function preg_replace; |
||
| 23 | use function strtr; |
||
| 24 | |||
| 25 | /** |
||
| 26 | * Convert between UTF-8 and ANSEL encoding. |
||
| 27 | * |
||
| 28 | * ANSEL is the common name for the MARC-21 encoding, also known as Z39.47, which |
||
| 29 | * has a number of editions. These are denoted by a year suffix. |
||
| 30 | * |
||
| 31 | * The GEDCOM 5.5.1 specification (1999-10-02) specifies the Z39.47-1985 edition. |
||
| 32 | * It adds Es Zett (ß) at CF. |
||
| 33 | * |
||
| 34 | * According to wikipedia, other non-standard characters are also added. |
||
| 35 | * |
||
| 36 | * HEX Unicode Glyph Description |
||
| 37 | * BE 25A1 □ Empty box |
||
| 38 | * BF 25A0 ■ Black box |
||
| 39 | * CD 0065 e Midline e |
||
| 40 | * CE 006F o Midline o |
||
| 41 | * CF 00DF ß Es Zett |
||
| 42 | * FC 0338 / Combining slash |
||
| 43 | * |
||
| 44 | * @link https://en.wikipedia.org/wiki/ANSEL |
||
| 45 | * |
||
| 46 | * The MARC-21 specification has added a number of additional characters since |
||
| 47 | * the 1985 edition. |
||
| 48 | * |
||
| 49 | * HEX Unicode Glyph Description |
||
| 50 | * 88 0098 Start of string |
||
| 51 | * 89 009C String terminator |
||
| 52 | * 8D 200D Zero width joiner |
||
| 53 | * 8E 200C Zero width non-joiner |
||
| 54 | * A7 CAB9 ʹ Single prime |
||
| 55 | * AC C6AF Ơ LATIN CAPITAL LETTER O WITH HORN |
||
| 56 | * AD C6AF Ư LATIN CAPITAL LETTER U WITH HORN |
||
| 57 | * B7 CABA ʺ Double prime |
||
| 58 | * BC C6A1 ơ LATIN SMALL LETTER O WITH HORN |
||
| 59 | * BD C6B0 ư LATIN SMALL LETTER U WITH HORN |
||
| 60 | * C0 C2B0 ° Degree sign |
||
| 61 | * C1 E28493 ℓ Script small L |
||
| 62 | * C2 E28497 ℗ Sound recording copyright |
||
| 63 | * C4 E282AC ♯ Music sharp sign |
||
| 64 | * C7 00DF ß Es Zett |
||
| 65 | * C8 20AC € Euro sign |
||
| 66 | * E0 0309 Hook above |
||
| 67 | * EB 0361 Breve (first part / double) |
||
| 68 | * EC 0361 Breve (second part) |
||
| 69 | * EF 0310 Candrabindu |
||
| 70 | * F2 0323 Low dot |
||
| 71 | * F3 0324 Diaeresis below |
||
| 72 | * F4 0325 Ring below |
||
| 73 | * F5 0333 Double underline |
||
| 74 | * F7 0332 Underline |
||
| 75 | * F8 031C Comma below |
||
| 76 | * F9 032E Breve below |
||
| 77 | * FA 0360 Double tilde (first part / double). |
||
| 78 | * FB 0360 Double tilde (second part). |
||
| 79 | * FF 0338 Slash |
||
| 80 | * |
||
| 81 | * @link https://memory.loc.gov/diglib/codetables/45.html |
||
| 82 | * |
||
| 83 | * Note that this means we can expect two different representations of Es Zett. |
||
| 84 | * |
||
| 85 | * There are two multi-part diacritics. There are two ways to represent these. |
||
| 86 | * |
||
| 87 | * ANSEL | UTF-8 | UTF-8 (prefered) |
||
| 88 | * ------------+---------------+----------------- |
||
| 89 | * FA x FB y | x FE22 y FE23 | x 0360 y |
||
| 90 | * EB x EC y | y FE20 y FE21 | x 0361 y |
||
| 91 | */ |
||
| 92 | class ANSEL extends AbstractEncoding |
||
| 93 | { |
||
| 94 | public const string NAME = 'ANSEL'; |
||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
| 95 | |||
| 96 | protected const array TO_UTF8 = [ |
||
| 97 | "\x80" => UTF8::REPLACEMENT_CHARACTER, |
||
| 98 | "\x81" => UTF8::REPLACEMENT_CHARACTER, |
||
| 99 | "\x82" => UTF8::REPLACEMENT_CHARACTER, |
||
| 100 | "\x83" => UTF8::REPLACEMENT_CHARACTER, |
||
| 101 | "\x84" => UTF8::REPLACEMENT_CHARACTER, |
||
| 102 | "\x85" => UTF8::REPLACEMENT_CHARACTER, |
||
| 103 | "\x86" => UTF8::REPLACEMENT_CHARACTER, |
||
| 104 | "\x87" => UTF8::REPLACEMENT_CHARACTER, |
||
| 105 | "\x88" => UTF8::START_OF_STRING, |
||
| 106 | "\x89" => UTF8::STRING_TERMINATOR, |
||
| 107 | "\x8A" => UTF8::REPLACEMENT_CHARACTER, |
||
| 108 | "\x8B" => UTF8::REPLACEMENT_CHARACTER, |
||
| 109 | "\x8C" => UTF8::REPLACEMENT_CHARACTER, |
||
| 110 | "\x8D" => UTF8::ZERO_WIDTH_JOINER, |
||
| 111 | "\x8E" => UTF8::ZERO_WIDTH_NON_JOINER, |
||
| 112 | "\x8F" => UTF8::REPLACEMENT_CHARACTER, |
||
| 113 | "\x90" => UTF8::REPLACEMENT_CHARACTER, |
||
| 114 | "\x91" => UTF8::REPLACEMENT_CHARACTER, |
||
| 115 | "\x92" => UTF8::REPLACEMENT_CHARACTER, |
||
| 116 | "\x93" => UTF8::REPLACEMENT_CHARACTER, |
||
| 117 | "\x94" => UTF8::REPLACEMENT_CHARACTER, |
||
| 118 | "\x95" => UTF8::REPLACEMENT_CHARACTER, |
||
| 119 | "\x96" => UTF8::REPLACEMENT_CHARACTER, |
||
| 120 | "\x97" => UTF8::REPLACEMENT_CHARACTER, |
||
| 121 | "\x98" => UTF8::REPLACEMENT_CHARACTER, |
||
| 122 | "\x99" => UTF8::REPLACEMENT_CHARACTER, |
||
| 123 | "\x9A" => UTF8::REPLACEMENT_CHARACTER, |
||
| 124 | "\x9B" => UTF8::REPLACEMENT_CHARACTER, |
||
| 125 | "\x9C" => UTF8::REPLACEMENT_CHARACTER, |
||
| 126 | "\x9D" => UTF8::REPLACEMENT_CHARACTER, |
||
| 127 | "\x9E" => UTF8::REPLACEMENT_CHARACTER, |
||
| 128 | "\x9F" => UTF8::REPLACEMENT_CHARACTER, |
||
| 129 | "\xA0" => UTF8::REPLACEMENT_CHARACTER, |
||
| 130 | "\xA1" => UTF8::LATIN_CAPITAL_LETTER_L_WITH_STROKE, |
||
| 131 | "\xA2" => UTF8::LATIN_CAPITAL_LETTER_O_WITH_STROKE, |
||
| 132 | "\xA3" => UTF8::LATIN_CAPITAL_LETTER_D_WITH_STROKE, |
||
| 133 | "\xA4" => UTF8::LATIN_CAPITAL_LETTER_THORN, |
||
| 134 | "\xA5" => UTF8::LATIN_CAPITAL_LETTER_AE, |
||
| 135 | "\xA6" => UTF8::LATIN_CAPITAL_LIGATURE_OE, |
||
| 136 | "\xA7" => UTF8::MODIFIER_LETTER_PRIME, |
||
| 137 | "\xA8" => UTF8::MIDDLE_DOT, |
||
| 138 | "\xA9" => UTF8::MUSIC_FLAT_SIGN, |
||
| 139 | "\xAA" => UTF8::REGISTERED_SIGN, |
||
| 140 | "\xAB" => UTF8::PLUS_MINUS_SIGN, |
||
| 141 | "\xAC" => UTF8::LATIN_CAPITAL_LETTER_O_WITH_HORN, |
||
| 142 | "\xAD" => UTF8::LATIN_CAPITAL_LETTER_U_WITH_HORN, |
||
| 143 | "\xAE" => UTF8::MODIFIER_LETTER_APOSTROPHE, |
||
| 144 | "\xAF" => UTF8::REPLACEMENT_CHARACTER, |
||
| 145 | "\xB0" => UTF8::MODIFIER_LETTER_TURNED_COMMA, |
||
| 146 | "\xB1" => UTF8::LATIN_SMALL_LETTER_L_WITH_STROKE, |
||
| 147 | "\xB2" => UTF8::LATIN_SMALL_LETTER_O_WITH_STROKE, |
||
| 148 | "\xB3" => UTF8::LATIN_SMALL_LETTER_D_WITH_STROKE, |
||
| 149 | "\xB4" => UTF8::LATIN_SMALL_LETTER_THORN, |
||
| 150 | "\xB5" => UTF8::LATIN_SMALL_LETTER_AE, |
||
| 151 | "\xB6" => UTF8::LATIN_SMALL_LIGATURE_OE, |
||
| 152 | "\xB7" => UTF8::MODIFIER_LETTER_DOUBLE_PRIME, |
||
| 153 | "\xB8" => UTF8::LATIN_SMALL_LETTER_DOTLESS_I, |
||
| 154 | "\xB9" => UTF8::POUND_SIGN, |
||
| 155 | "\xBA" => UTF8::LATIN_SMALL_LETTER_ETH, |
||
| 156 | "\xBB" => UTF8::REPLACEMENT_CHARACTER, |
||
| 157 | "\xBC" => UTF8::LATIN_SMALL_LETTER_O_WITH_HORN, |
||
| 158 | "\xBD" => UTF8::LATIN_SMALL_LETTER_U_WITH_HORN, |
||
| 159 | "\xBE" => UTF8::WHITE_SQUARE, |
||
| 160 | "\xBF" => UTF8::BLACK_SQUARE, |
||
| 161 | "\xC0" => UTF8::DEGREE_SIGN, |
||
| 162 | "\xC1" => UTF8::SCRIPT_SMALL_L, |
||
| 163 | "\xC2" => UTF8::SOUND_RECORDING_COPYRIGHT, |
||
| 164 | "\xC3" => UTF8::COPYRIGHT_SIGN, |
||
| 165 | "\xC4" => UTF8::MUSIC_SHARP_SIGN, |
||
| 166 | "\xC5" => UTF8::INVERTED_QUESTION_MARK, |
||
| 167 | "\xC6" => UTF8::INVERTED_EXCLAMATION_MARK, |
||
| 168 | "\xC7" => UTF8::LATIN_CAPITAL_LETTER_SHARP_S, |
||
| 169 | "\xC8" => UTF8::EURO_SIGN, |
||
| 170 | "\xC9" => UTF8::REPLACEMENT_CHARACTER, |
||
| 171 | "\xCA" => UTF8::REPLACEMENT_CHARACTER, |
||
| 172 | "\xCB" => UTF8::REPLACEMENT_CHARACTER, |
||
| 173 | "\xCC" => UTF8::REPLACEMENT_CHARACTER, |
||
| 174 | "\xCD" => UTF8::REPLACEMENT_CHARACTER, |
||
| 175 | "\xCE" => UTF8::REPLACEMENT_CHARACTER, |
||
| 176 | "\xCF" => UTF8::LATIN_SMALL_LETTER_SHARP_S, |
||
| 177 | "\xD0" => UTF8::REPLACEMENT_CHARACTER, |
||
| 178 | "\xD1" => UTF8::REPLACEMENT_CHARACTER, |
||
| 179 | "\xD2" => UTF8::REPLACEMENT_CHARACTER, |
||
| 180 | "\xD3" => UTF8::REPLACEMENT_CHARACTER, |
||
| 181 | "\xD4" => UTF8::REPLACEMENT_CHARACTER, |
||
| 182 | "\xD5" => UTF8::REPLACEMENT_CHARACTER, |
||
| 183 | "\xD6" => UTF8::REPLACEMENT_CHARACTER, |
||
| 184 | "\xD7" => UTF8::REPLACEMENT_CHARACTER, |
||
| 185 | "\xD8" => UTF8::REPLACEMENT_CHARACTER, |
||
| 186 | "\xD9" => UTF8::REPLACEMENT_CHARACTER, |
||
| 187 | "\xDA" => UTF8::REPLACEMENT_CHARACTER, |
||
| 188 | "\xDB" => UTF8::REPLACEMENT_CHARACTER, |
||
| 189 | "\xDC" => UTF8::REPLACEMENT_CHARACTER, |
||
| 190 | "\xDD" => UTF8::REPLACEMENT_CHARACTER, |
||
| 191 | "\xDE" => UTF8::REPLACEMENT_CHARACTER, |
||
| 192 | "\xDF" => UTF8::REPLACEMENT_CHARACTER, |
||
| 193 | "\xE0" => UTF8::COMBINING_HOOK_ABOVE, |
||
| 194 | "\xE1" => UTF8::COMBINING_GRAVE_ACCENT, |
||
| 195 | "\xE2" => UTF8::COMBINING_ACUTE_ACCENT, |
||
| 196 | "\xE3" => UTF8::COMBINING_CIRCUMFLEX_ACCENT, |
||
| 197 | "\xE4" => UTF8::COMBINING_TILDE, |
||
| 198 | "\xE5" => UTF8::COMBINING_MACRON, |
||
| 199 | "\xE6" => UTF8::COMBINING_BREVE, |
||
| 200 | "\xE7" => UTF8::COMBINING_DOT_ABOVE, |
||
| 201 | "\xE8" => UTF8::COMBINING_DIAERESIS, |
||
| 202 | "\xE9" => UTF8::COMBINING_CARON, |
||
| 203 | "\xEA" => UTF8::COMBINING_RING_ABOVE, |
||
| 204 | "\xEB" => UTF8::COMBINING_DOUBLE_INVERTED_BREVE, |
||
| 205 | "\xEC" => '', |
||
| 206 | "\xED" => UTF8::COMBINING_COMMA_ABOVE_RIGHT, |
||
| 207 | "\xEE" => UTF8::COMBINING_DOUBLE_ACUTE_ACCENT, |
||
| 208 | "\xEF" => UTF8::COMBINING_CANDRABINDU, |
||
| 209 | "\xF0" => UTF8::COMBINING_CEDILLA, |
||
| 210 | "\xF1" => UTF8::COMBINING_OGONEK, |
||
| 211 | "\xF2" => UTF8::COMBINING_DOT_BELOW, |
||
| 212 | "\xF3" => UTF8::COMBINING_DIAERESIS_BELOW, |
||
| 213 | "\xF4" => UTF8::COMBINING_RING_BELOW, |
||
| 214 | "\xF5" => UTF8::COMBINING_DOUBLE_LOW_LINE, |
||
| 215 | "\xF6" => UTF8::COMBINING_LOW_LINE, |
||
| 216 | "\xF7" => UTF8::COMBINING_COMMA_BELOW, |
||
| 217 | "\xF8" => UTF8::COMBINING_LEFT_HALF_RING_BELOW, |
||
| 218 | "\xF9" => UTF8::COMBINING_BREVE_BELOW, |
||
| 219 | "\xFA" => UTF8::COMBINING_DOUBLE_TILDE, |
||
| 220 | "\xFB" => '', |
||
| 221 | "\xFC" => UTF8::REPLACEMENT_CHARACTER, |
||
| 222 | "\xFD" => UTF8::REPLACEMENT_CHARACTER, |
||
| 223 | "\xFE" => UTF8::COMBINING_COMMA_ABOVE, |
||
| 224 | "\xFF" => UTF8::COMBINING_LONG_SOLIDUS_OVERLAY, |
||
| 225 | ]; |
||
| 226 | |||
| 227 | // The subset of pre-composed UTF8 characters that can be made from ANSEL characters. |
||
| 228 | private const array PRECOMPOSED_CHARACTERS = [ |
||
| 229 | 'A' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_ACUTE, |
||
| 230 | 'A' . UTF8::COMBINING_BREVE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE, |
||
| 231 | 'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_ACUTE, |
||
| 232 | 'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_DOT_BELOW, |
||
| 233 | 'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_GRAVE, |
||
| 234 | 'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_HOOK_ABOVE, |
||
| 235 | 'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_TILDE, |
||
| 236 | 'A' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CARON, |
||
| 237 | 'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX, |
||
| 238 | 'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_ACUTE, |
||
| 239 | 'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_DOT_BELOW, |
||
| 240 | 'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_GRAVE, |
||
| 241 | 'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_HOOK_ABOVE, |
||
| 242 | 'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_TILDE, |
||
| 243 | 'A' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS, |
||
| 244 | 'A' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS_AND_MACRON, |
||
| 245 | 'A' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE, |
||
| 246 | 'A' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON, |
||
| 247 | 'A' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DOT_BELOW, |
||
| 248 | 'A' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_GRAVE, |
||
| 249 | 'A' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_HOOK_ABOVE, |
||
| 250 | 'A' . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_A_WITH_MACRON, |
||
| 251 | 'A' . UTF8::COMBINING_OGONEK => UTF8::LATIN_CAPITAL_LETTER_A_WITH_OGONEK, |
||
| 252 | 'A' . UTF8::COMBINING_RING_ABOVE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE, |
||
| 253 | 'A' . UTF8::COMBINING_RING_ABOVE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE, |
||
| 254 | 'A' . UTF8::COMBINING_RING_BELOW => UTF8::LATIN_CAPITAL_LETTER_A_WITH_RING_BELOW, |
||
| 255 | 'A' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_TILDE, |
||
| 256 | 'B' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_B_WITH_DOT_ABOVE, |
||
| 257 | 'B' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_B_WITH_DOT_BELOW, |
||
| 258 | 'C' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_C_WITH_ACUTE, |
||
| 259 | 'C' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CARON, |
||
| 260 | 'C' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CEDILLA, |
||
| 261 | 'C' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX, |
||
| 262 | 'C' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE, |
||
| 263 | 'C' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CEDILLA_AND_ACUTE, |
||
| 264 | 'D' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_D_WITH_CARON, |
||
| 265 | 'D' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_D_WITH_CEDILLA, |
||
| 266 | 'D' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_D_WITH_DOT_ABOVE, |
||
| 267 | 'D' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_D_WITH_DOT_BELOW, |
||
| 268 | 'E' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_E_WITH_ACUTE, |
||
| 269 | 'E' . UTF8::COMBINING_BREVE => UTF8::LATIN_CAPITAL_LETTER_E_WITH_BREVE, |
||
| 270 | 'E' . UTF8::COMBINING_BREVE . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CEDILLA_AND_BREVE, |
||
| 271 | 'E' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CARON, |
||
| 272 | 'E' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CEDILLA, |
||
| 273 | 'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX, |
||
| 274 | 'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_ACUTE, |
||
| 275 | 'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_DOT_BELOW, |
||
| 276 | 'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_GRAVE, |
||
| 277 | 'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_HOOK_ABOVE, |
||
| 278 | 'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_TILDE, |
||
| 279 | 'E' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS, |
||
| 280 | 'E' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE, |
||
| 281 | 'E' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_E_WITH_DOT_BELOW, |
||
| 282 | 'E' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_E_WITH_GRAVE, |
||
| 283 | 'E' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_E_WITH_HOOK_ABOVE, |
||
| 284 | 'E' . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_E_WITH_MACRON, |
||
| 285 | 'E' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_ACUTE, |
||
| 286 | 'E' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_GRAVE, |
||
| 287 | 'E' . UTF8::COMBINING_OGONEK => UTF8::LATIN_CAPITAL_LETTER_E_WITH_OGONEK, |
||
| 288 | 'E' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_E_WITH_TILDE, |
||
| 289 | 'F' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_F_WITH_DOT_ABOVE, |
||
| 290 | 'G' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_G_WITH_ACUTE, |
||
| 291 | 'G' . UTF8::COMBINING_BREVE => UTF8::LATIN_CAPITAL_LETTER_G_WITH_BREVE, |
||
| 292 | 'G' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_G_WITH_CARON, |
||
| 293 | 'G' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_G_WITH_CEDILLA, |
||
| 294 | 'G' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX, |
||
| 295 | 'G' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE, |
||
| 296 | 'G' . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_G_WITH_MACRON, |
||
| 297 | 'H' . UTF8::COMBINING_BREVE_BELOW => UTF8::LATIN_CAPITAL_LETTER_H_WITH_BREVE_BELOW, |
||
| 298 | 'H' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_H_WITH_CARON, |
||
| 299 | 'H' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_H_WITH_CEDILLA, |
||
| 300 | 'H' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX, |
||
| 301 | 'H' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_H_WITH_DIAERESIS, |
||
| 302 | 'H' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_H_WITH_DOT_ABOVE, |
||
| 303 | 'H' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_H_WITH_DOT_BELOW, |
||
| 304 | 'I' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_I_WITH_ACUTE, |
||
| 305 | 'I' . UTF8::COMBINING_BREVE => UTF8::LATIN_CAPITAL_LETTER_I_WITH_BREVE, |
||
| 306 | 'I' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_I_WITH_CARON, |
||
| 307 | 'I' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX, |
||
| 308 | 'I' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS, |
||
| 309 | 'I' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS_AND_ACUTE, |
||
| 310 | 'I' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE, |
||
| 311 | 'I' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DOT_BELOW, |
||
| 312 | 'I' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_I_WITH_GRAVE, |
||
| 313 | 'I' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_I_WITH_HOOK_ABOVE, |
||
| 314 | 'I' . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_I_WITH_MACRON, |
||
| 315 | 'I' . UTF8::COMBINING_OGONEK => UTF8::LATIN_CAPITAL_LETTER_I_WITH_OGONEK, |
||
| 316 | 'I' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_I_WITH_TILDE, |
||
| 317 | 'J' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX, |
||
| 318 | 'K' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_K_WITH_CARON, |
||
| 319 | 'K' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_K_WITH_CEDILLA, |
||
| 320 | 'K' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_K_WITH_ACUTE, |
||
| 321 | 'K' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_K_WITH_DOT_BELOW, |
||
| 322 | 'L' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_L_WITH_ACUTE, |
||
| 323 | 'L' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_L_WITH_CARON, |
||
| 324 | 'L' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_L_WITH_CEDILLA, |
||
| 325 | 'L' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_L_WITH_DOT_BELOW, |
||
| 326 | 'L' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_L_WITH_DOT_BELOW_AND_MACRON, |
||
| 327 | 'M' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_M_WITH_ACUTE, |
||
| 328 | 'M' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_M_WITH_DOT_ABOVE, |
||
| 329 | 'M' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_M_WITH_DOT_BELOW, |
||
| 330 | 'N' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_N_WITH_ACUTE, |
||
| 331 | 'N' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_N_WITH_CARON, |
||
| 332 | 'N' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_N_WITH_CEDILLA, |
||
| 333 | 'N' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_N_WITH_DOT_ABOVE, |
||
| 334 | 'N' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_N_WITH_DOT_BELOW, |
||
| 335 | 'N' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_N_WITH_GRAVE, |
||
| 336 | 'N' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_N_WITH_TILDE, |
||
| 337 | 'O' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_ACUTE, |
||
| 338 | 'O' . UTF8::COMBINING_BREVE => UTF8::LATIN_CAPITAL_LETTER_O_WITH_BREVE, |
||
| 339 | 'O' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CARON, |
||
| 340 | 'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX, |
||
| 341 | 'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_ACUTE, |
||
| 342 | 'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_DOT_BELOW, |
||
| 343 | 'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_GRAVE, |
||
| 344 | 'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_HOOK_ABOVE, |
||
| 345 | 'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_TILDE, |
||
| 346 | 'O' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS, |
||
| 347 | 'O' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS_AND_MACRON, |
||
| 348 | 'O' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE, |
||
| 349 | 'O' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON, |
||
| 350 | 'O' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOT_BELOW, |
||
| 351 | 'O' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOUBLE_ACUTE, |
||
| 352 | 'O' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_GRAVE, |
||
| 353 | 'O' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_O_WITH_HOOK_ABOVE, |
||
| 354 | 'O' . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_O_WITH_MACRON, |
||
| 355 | 'O' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_MACRON_AND_ACUTE, |
||
| 356 | 'O' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_MACRON_AND_GRAVE, |
||
| 357 | 'O' . UTF8::COMBINING_OGONEK => UTF8::LATIN_CAPITAL_LETTER_O_WITH_OGONEK, |
||
| 358 | 'O' . UTF8::COMBINING_OGONEK . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_O_WITH_OGONEK_AND_MACRON, |
||
| 359 | 'O' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE, |
||
| 360 | 'O' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_ACUTE, |
||
| 361 | 'O' . UTF8::COMBINING_TILDE . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_DIAERESIS, |
||
| 362 | 'O' . UTF8::COMBINING_TILDE . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_MACRON, |
||
| 363 | 'P' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_P_WITH_ACUTE, |
||
| 364 | 'P' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_P_WITH_DOT_ABOVE, |
||
| 365 | 'R' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_R_WITH_ACUTE, |
||
| 366 | 'R' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_R_WITH_CARON, |
||
| 367 | 'R' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_R_WITH_CEDILLA, |
||
| 368 | 'R' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_R_WITH_DOT_ABOVE, |
||
| 369 | 'R' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_R_WITH_DOT_BELOW, |
||
| 370 | 'R' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_R_WITH_DOT_BELOW_AND_MACRON, |
||
| 371 | 'S' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_S_WITH_ACUTE, |
||
| 372 | 'S' . UTF8::COMBINING_ACUTE_ACCENT . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_S_WITH_ACUTE_AND_DOT_ABOVE, |
||
| 373 | 'S' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CARON, |
||
| 374 | 'S' . UTF8::COMBINING_CARON . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CARON_AND_DOT_ABOVE, |
||
| 375 | 'S' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CEDILLA, |
||
| 376 | 'S' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX, |
||
| 377 | 'S' . UTF8::COMBINING_COMMA_BELOW => UTF8::LATIN_CAPITAL_LETTER_S_WITH_COMMA_BELOW, |
||
| 378 | 'S' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_S_WITH_DOT_ABOVE, |
||
| 379 | 'S' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_S_WITH_DOT_BELOW, |
||
| 380 | 'S' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_S_WITH_DOT_BELOW_AND_DOT_ABOVE, |
||
| 381 | 'T' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_T_WITH_CARON, |
||
| 382 | 'T' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_T_WITH_CEDILLA, |
||
| 383 | 'T' . UTF8::COMBINING_COMMA_BELOW => UTF8::LATIN_CAPITAL_LETTER_T_WITH_COMMA_BELOW, |
||
| 384 | 'T' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_T_WITH_DOT_ABOVE, |
||
| 385 | 'T' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_T_WITH_DOT_BELOW, |
||
| 386 | 'U' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_U_WITH_ACUTE, |
||
| 387 | 'U' . UTF8::COMBINING_BREVE => UTF8::LATIN_CAPITAL_LETTER_U_WITH_BREVE, |
||
| 388 | 'U' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_U_WITH_CARON, |
||
| 389 | 'U' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX, |
||
| 390 | 'U' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS, |
||
| 391 | 'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_ACUTE, |
||
| 392 | 'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_CARON, |
||
| 393 | 'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_GRAVE, |
||
| 394 | 'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_MACRON, |
||
| 395 | 'U' . UTF8::COMBINING_DIAERESIS_BELOW => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_BELOW, |
||
| 396 | 'U' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DOT_BELOW, |
||
| 397 | 'U' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DOUBLE_ACUTE, |
||
| 398 | 'U' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_U_WITH_GRAVE, |
||
| 399 | 'U' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_U_WITH_HOOK_ABOVE, |
||
| 400 | 'U' . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_U_WITH_MACRON, |
||
| 401 | 'U' . UTF8::COMBINING_MACRON . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_U_WITH_MACRON_AND_DIAERESIS, |
||
| 402 | 'U' . UTF8::COMBINING_OGONEK => UTF8::LATIN_CAPITAL_LETTER_U_WITH_OGONEK, |
||
| 403 | 'U' . UTF8::COMBINING_RING_ABOVE => UTF8::LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE, |
||
| 404 | 'U' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_U_WITH_TILDE, |
||
| 405 | 'U' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_U_WITH_TILDE_AND_ACUTE, |
||
| 406 | 'V' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_V_WITH_DOT_BELOW, |
||
| 407 | 'V' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_V_WITH_TILDE, |
||
| 408 | 'W' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_W_WITH_ACUTE, |
||
| 409 | 'W' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX, |
||
| 410 | 'W' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_W_WITH_DIAERESIS, |
||
| 411 | 'W' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_W_WITH_DOT_ABOVE, |
||
| 412 | 'W' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_W_WITH_DOT_BELOW, |
||
| 413 | 'W' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_W_WITH_GRAVE, |
||
| 414 | 'X' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_X_WITH_DIAERESIS, |
||
| 415 | 'X' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_X_WITH_DOT_ABOVE, |
||
| 416 | 'Y' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_ACUTE, |
||
| 417 | 'Y' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX, |
||
| 418 | 'Y' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS, |
||
| 419 | 'Y' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_DOT_ABOVE, |
||
| 420 | 'Y' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_DOT_BELOW, |
||
| 421 | 'Y' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_GRAVE, |
||
| 422 | 'Y' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_HOOK_ABOVE, |
||
| 423 | 'Y' . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_MACRON, |
||
| 424 | 'Y' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_TILDE, |
||
| 425 | 'Z' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_ACUTE, |
||
| 426 | 'Z' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_CARON, |
||
| 427 | 'Z' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_CIRCUMFLEX, |
||
| 428 | 'Z' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE, |
||
| 429 | 'Z' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_DOT_BELOW, |
||
| 430 | 'a' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_ACUTE, |
||
| 431 | 'a' . UTF8::COMBINING_BREVE => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE, |
||
| 432 | 'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_ACUTE, |
||
| 433 | 'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_DOT_BELOW, |
||
| 434 | 'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_GRAVE, |
||
| 435 | 'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_HOOK_ABOVE, |
||
| 436 | 'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_TILDE, |
||
| 437 | 'a' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_A_WITH_CARON, |
||
| 438 | 'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX, |
||
| 439 | 'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_ACUTE, |
||
| 440 | 'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_DOT_BELOW, |
||
| 441 | 'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_GRAVE, |
||
| 442 | 'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_HOOK_ABOVE, |
||
| 443 | 'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_TILDE, |
||
| 444 | 'a' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_A_WITH_DIAERESIS, |
||
| 445 | 'a' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_A_WITH_DIAERESIS_AND_MACRON, |
||
| 446 | 'a' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE, |
||
| 447 | 'a' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON, |
||
| 448 | 'a' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_A_WITH_DOT_BELOW, |
||
| 449 | 'a' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_GRAVE, |
||
| 450 | 'a' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_A_WITH_HOOK_ABOVE, |
||
| 451 | 'a' . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_A_WITH_MACRON, |
||
| 452 | 'a' . UTF8::COMBINING_OGONEK => UTF8::LATIN_SMALL_LETTER_A_WITH_OGONEK, |
||
| 453 | 'a' . UTF8::COMBINING_RING_ABOVE => UTF8::LATIN_SMALL_LETTER_A_WITH_RING_ABOVE, |
||
| 454 | 'a' . UTF8::COMBINING_RING_ABOVE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE, |
||
| 455 | 'a' . UTF8::COMBINING_RING_BELOW => UTF8::LATIN_SMALL_LETTER_A_WITH_RING_BELOW, |
||
| 456 | 'a' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_A_WITH_TILDE, |
||
| 457 | 'b' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_B_WITH_DOT_ABOVE, |
||
| 458 | 'b' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_B_WITH_DOT_BELOW, |
||
| 459 | 'c' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_C_WITH_ACUTE, |
||
| 460 | 'c' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_C_WITH_CARON, |
||
| 461 | 'c' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_C_WITH_CEDILLA, |
||
| 462 | 'c' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX, |
||
| 463 | 'c' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE, |
||
| 464 | 'c' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_C_WITH_CEDILLA_AND_ACUTE, |
||
| 465 | 'd' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_D_WITH_CARON, |
||
| 466 | 'd' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_D_WITH_CEDILLA, |
||
| 467 | 'd' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_D_WITH_DOT_ABOVE, |
||
| 468 | 'd' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_D_WITH_DOT_BELOW, |
||
| 469 | 'e' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_E_WITH_ACUTE, |
||
| 470 | 'e' . UTF8::COMBINING_BREVE => UTF8::LATIN_SMALL_LETTER_E_WITH_BREVE, |
||
| 471 | 'e' . UTF8::COMBINING_BREVE . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_E_WITH_CEDILLA_AND_BREVE, |
||
| 472 | 'e' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_E_WITH_CARON, |
||
| 473 | 'e' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_E_WITH_CEDILLA, |
||
| 474 | 'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX, |
||
| 475 | 'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_ACUTE, |
||
| 476 | 'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_DOT_BELOW, |
||
| 477 | 'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_GRAVE, |
||
| 478 | 'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_HOOK_ABOVE, |
||
| 479 | 'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_TILDE, |
||
| 480 | 'e' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_E_WITH_DIAERESIS, |
||
| 481 | 'e' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE, |
||
| 482 | 'e' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_E_WITH_DOT_BELOW, |
||
| 483 | 'e' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_E_WITH_GRAVE, |
||
| 484 | 'e' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_E_WITH_HOOK_ABOVE, |
||
| 485 | 'e' . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_E_WITH_MACRON, |
||
| 486 | 'e' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_E_WITH_MACRON_AND_ACUTE, |
||
| 487 | 'e' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_E_WITH_MACRON_AND_GRAVE, |
||
| 488 | 'e' . UTF8::COMBINING_OGONEK => UTF8::LATIN_SMALL_LETTER_E_WITH_OGONEK, |
||
| 489 | 'e' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_E_WITH_TILDE, |
||
| 490 | 'f' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_F_WITH_DOT_ABOVE, |
||
| 491 | 'g' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_G_WITH_ACUTE, |
||
| 492 | 'g' . UTF8::COMBINING_BREVE => UTF8::LATIN_SMALL_LETTER_G_WITH_BREVE, |
||
| 493 | 'g' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_G_WITH_CARON, |
||
| 494 | 'g' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_G_WITH_CEDILLA, |
||
| 495 | 'g' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX, |
||
| 496 | 'g' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE, |
||
| 497 | 'g' . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_G_WITH_MACRON, |
||
| 498 | 'h' . UTF8::COMBINING_BREVE_BELOW => UTF8::LATIN_SMALL_LETTER_H_WITH_BREVE_BELOW, |
||
| 499 | 'h' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_H_WITH_CARON, |
||
| 500 | 'h' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_H_WITH_CEDILLA, |
||
| 501 | 'h' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX, |
||
| 502 | 'h' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_H_WITH_DIAERESIS, |
||
| 503 | 'h' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_H_WITH_DOT_ABOVE, |
||
| 504 | 'h' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_H_WITH_DOT_BELOW, |
||
| 505 | 'i' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_I_WITH_ACUTE, |
||
| 506 | 'i' . UTF8::COMBINING_BREVE => UTF8::LATIN_SMALL_LETTER_I_WITH_BREVE, |
||
| 507 | 'i' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_I_WITH_CARON, |
||
| 508 | 'i' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX, |
||
| 509 | 'i' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_I_WITH_DIAERESIS, |
||
| 510 | 'i' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_I_WITH_DIAERESIS_AND_ACUTE, |
||
| 511 | 'i' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_I_WITH_DOT_BELOW, |
||
| 512 | 'i' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_I_WITH_GRAVE, |
||
| 513 | 'i' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_I_WITH_HOOK_ABOVE, |
||
| 514 | 'i' . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_I_WITH_MACRON, |
||
| 515 | 'i' . UTF8::COMBINING_OGONEK => UTF8::LATIN_SMALL_LETTER_I_WITH_OGONEK, |
||
| 516 | 'i' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_I_WITH_TILDE, |
||
| 517 | 'j' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_J_WITH_CARON, |
||
| 518 | 'j' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX, |
||
| 519 | 'k' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_K_WITH_CARON, |
||
| 520 | 'k' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_K_WITH_CEDILLA, |
||
| 521 | 'k' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_K_WITH_ACUTE, |
||
| 522 | 'k' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_K_WITH_DOT_BELOW, |
||
| 523 | 'l' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_L_WITH_ACUTE, |
||
| 524 | 'l' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_L_WITH_CARON, |
||
| 525 | 'l' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_L_WITH_CEDILLA, |
||
| 526 | 'l' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_L_WITH_DOT_BELOW, |
||
| 527 | 'l' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_L_WITH_DOT_BELOW_AND_MACRON, |
||
| 528 | 'm' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_M_WITH_ACUTE, |
||
| 529 | 'm' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_M_WITH_DOT_ABOVE, |
||
| 530 | 'm' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_M_WITH_DOT_BELOW, |
||
| 531 | 'n' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_N_WITH_ACUTE, |
||
| 532 | 'n' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_N_WITH_CARON, |
||
| 533 | 'n' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_N_WITH_CEDILLA, |
||
| 534 | 'n' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_N_WITH_DOT_ABOVE, |
||
| 535 | 'n' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_N_WITH_DOT_BELOW, |
||
| 536 | 'n' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_N_WITH_GRAVE, |
||
| 537 | 'n' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_N_WITH_TILDE, |
||
| 538 | 'o' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_ACUTE, |
||
| 539 | 'o' . UTF8::COMBINING_BREVE => UTF8::LATIN_SMALL_LETTER_O_WITH_BREVE, |
||
| 540 | 'o' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_O_WITH_CARON, |
||
| 541 | 'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX, |
||
| 542 | 'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_ACUTE, |
||
| 543 | 'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_DOT_BELOW, |
||
| 544 | 'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_GRAVE, |
||
| 545 | 'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_HOOK_ABOVE, |
||
| 546 | 'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_TILDE, |
||
| 547 | 'o' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_O_WITH_DIAERESIS, |
||
| 548 | 'o' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_O_WITH_DIAERESIS_AND_MACRON, |
||
| 549 | 'o' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE, |
||
| 550 | 'o' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON, |
||
| 551 | 'o' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_O_WITH_DOT_BELOW, |
||
| 552 | 'o' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_DOUBLE_ACUTE, |
||
| 553 | 'o' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_GRAVE, |
||
| 554 | 'o' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_O_WITH_HOOK_ABOVE, |
||
| 555 | 'o' . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_O_WITH_MACRON, |
||
| 556 | 'o' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_MACRON_AND_ACUTE, |
||
| 557 | 'o' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_MACRON_AND_GRAVE, |
||
| 558 | 'o' . UTF8::COMBINING_OGONEK => UTF8::LATIN_SMALL_LETTER_O_WITH_OGONEK, |
||
| 559 | 'o' . UTF8::COMBINING_OGONEK . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_O_WITH_OGONEK_AND_MACRON, |
||
| 560 | 'o' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE, |
||
| 561 | 'o' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE_AND_ACUTE, |
||
| 562 | 'o' . UTF8::COMBINING_TILDE . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE_AND_DIAERESIS, |
||
| 563 | 'o' . UTF8::COMBINING_TILDE . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE_AND_MACRON, |
||
| 564 | 'p' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_P_WITH_ACUTE, |
||
| 565 | 'p' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_P_WITH_DOT_ABOVE, |
||
| 566 | 'r' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_R_WITH_ACUTE, |
||
| 567 | 'r' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_R_WITH_CARON, |
||
| 568 | 'r' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_R_WITH_CEDILLA, |
||
| 569 | 'r' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_R_WITH_DOT_ABOVE, |
||
| 570 | 'r' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_R_WITH_DOT_BELOW, |
||
| 571 | 'r' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_R_WITH_DOT_BELOW_AND_MACRON, |
||
| 572 | 's' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_S_WITH_ACUTE, |
||
| 573 | 's' . UTF8::COMBINING_ACUTE_ACCENT . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_S_WITH_ACUTE_AND_DOT_ABOVE, |
||
| 574 | 's' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_S_WITH_CARON, |
||
| 575 | 's' . UTF8::COMBINING_CARON . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_S_WITH_CARON_AND_DOT_ABOVE, |
||
| 576 | 's' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_S_WITH_CEDILLA, |
||
| 577 | 's' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX, |
||
| 578 | 's' . UTF8::COMBINING_COMMA_BELOW => UTF8::LATIN_SMALL_LETTER_S_WITH_COMMA_BELOW, |
||
| 579 | 's' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_S_WITH_DOT_ABOVE, |
||
| 580 | 's' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_S_WITH_DOT_BELOW, |
||
| 581 | 's' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_S_WITH_DOT_BELOW_AND_DOT_ABOVE, |
||
| 582 | 't' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_T_WITH_CARON, |
||
| 583 | 't' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_T_WITH_CEDILLA, |
||
| 584 | 't' . UTF8::COMBINING_COMMA_BELOW => UTF8::LATIN_SMALL_LETTER_T_WITH_COMMA_BELOW, |
||
| 585 | 't' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_T_WITH_DIAERESIS, |
||
| 586 | 't' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_T_WITH_DOT_ABOVE, |
||
| 587 | 't' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_T_WITH_DOT_BELOW, |
||
| 588 | 'u' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_U_WITH_ACUTE, |
||
| 589 | 'u' . UTF8::COMBINING_BREVE => UTF8::LATIN_SMALL_LETTER_U_WITH_BREVE, |
||
| 590 | 'u' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_U_WITH_CARON, |
||
| 591 | 'u' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX, |
||
| 592 | 'u' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS, |
||
| 593 | 'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_ACUTE, |
||
| 594 | 'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_CARON, |
||
| 595 | 'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_GRAVE, |
||
| 596 | 'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_MACRON, |
||
| 597 | 'u' . UTF8::COMBINING_DIAERESIS_BELOW => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_BELOW, |
||
| 598 | 'u' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_U_WITH_DOT_BELOW, |
||
| 599 | 'u' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_U_WITH_DOUBLE_ACUTE, |
||
| 600 | 'u' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_U_WITH_GRAVE, |
||
| 601 | 'u' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_U_WITH_HOOK_ABOVE, |
||
| 602 | 'u' . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_U_WITH_MACRON, |
||
| 603 | 'u' . UTF8::COMBINING_MACRON . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_U_WITH_MACRON_AND_DIAERESIS, |
||
| 604 | 'u' . UTF8::COMBINING_OGONEK => UTF8::LATIN_SMALL_LETTER_U_WITH_OGONEK, |
||
| 605 | 'u' . UTF8::COMBINING_RING_ABOVE => UTF8::LATIN_SMALL_LETTER_U_WITH_RING_ABOVE, |
||
| 606 | 'u' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_U_WITH_TILDE, |
||
| 607 | 'u' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_U_WITH_TILDE_AND_ACUTE, |
||
| 608 | 'v' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_V_WITH_DOT_BELOW, |
||
| 609 | 'v' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_V_WITH_TILDE, |
||
| 610 | 'w' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_W_WITH_ACUTE, |
||
| 611 | 'w' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX, |
||
| 612 | 'w' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_W_WITH_DIAERESIS, |
||
| 613 | 'w' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_W_WITH_DOT_ABOVE, |
||
| 614 | 'w' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_W_WITH_DOT_BELOW, |
||
| 615 | 'w' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_W_WITH_GRAVE, |
||
| 616 | 'w' . UTF8::COMBINING_RING_ABOVE => UTF8::LATIN_SMALL_LETTER_W_WITH_RING_ABOVE, |
||
| 617 | 'x' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_X_WITH_DIAERESIS, |
||
| 618 | 'x' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_X_WITH_DOT_ABOVE, |
||
| 619 | 'y' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_Y_WITH_ACUTE, |
||
| 620 | 'y' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX, |
||
| 621 | 'y' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, |
||
| 622 | 'y' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_Y_WITH_DOT_ABOVE, |
||
| 623 | 'y' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_Y_WITH_DOT_BELOW, |
||
| 624 | 'y' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_Y_WITH_GRAVE, |
||
| 625 | 'y' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_Y_WITH_HOOK_ABOVE, |
||
| 626 | 'y' . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_Y_WITH_MACRON, |
||
| 627 | 'y' . UTF8::COMBINING_RING_ABOVE => UTF8::LATIN_SMALL_LETTER_Y_WITH_RING_ABOVE, |
||
| 628 | 'y' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_Y_WITH_TILDE, |
||
| 629 | 'z' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_Z_WITH_ACUTE, |
||
| 630 | 'z' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_Z_WITH_CARON, |
||
| 631 | 'z' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_Z_WITH_CIRCUMFLEX, |
||
| 632 | 'z' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE, |
||
| 633 | 'z' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_Z_WITH_DOT_BELOW, |
||
| 634 | UTF8::LATIN_CAPITAL_LETTER_AE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_AE_WITH_ACUTE, |
||
| 635 | UTF8::LATIN_CAPITAL_LETTER_AE . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_AE_WITH_MACRON, |
||
| 636 | UTF8::LATIN_CAPITAL_LETTER_O_WITH_STROKE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_STROKE_AND_ACUTE, |
||
| 637 | UTF8::LATIN_SMALL_LETTER_AE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_AE_WITH_ACUTE, |
||
| 638 | UTF8::LATIN_SMALL_LETTER_AE . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_AE_WITH_MACRON, |
||
| 639 | UTF8::LATIN_SMALL_LETTER_O_WITH_STROKE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_STROKE_AND_ACUTE, |
||
| 640 | ]; |
||
| 641 | |||
| 642 | // ANSEL supports O and U with a horn diacritic, but not the combining diacritic. |
||
| 643 | private const array HORN_CONVERT_STEP_1 = [ |
||
| 644 | 'O' . UTF8::COMBINING_HORN => "\x00O_WITH_HORN\x00", |
||
| 645 | 'U' . UTF8::COMBINING_HORN => "\x00U_WITH_HORN\x00", |
||
| 646 | 'o' . UTF8::COMBINING_HORN => "\x00o_WITH_HORN\x00", |
||
| 647 | 'u' . UTF8::COMBINING_HORN => "\x00u_WITH_HORN\x00", |
||
| 648 | ]; |
||
| 649 | private const array HORN_CONVERT_STEP_2 = [ |
||
| 650 | "\x00O_WITH_HORN\x00" => "\xAC", |
||
| 651 | "\x00U_WITH_HORN\x00" => "\xAD", |
||
| 652 | "\x00o_WITH_HORN\x00" => "\xBC", |
||
| 653 | "\x00u_WITH_HORN\x00" => "\xBD", |
||
| 654 | ]; |
||
| 655 | |||
| 656 | /** |
||
| 657 | * Convert a string from another encoding to UTF-8. |
||
| 658 | * |
||
| 659 | * @param string $text |
||
| 660 | * |
||
| 661 | * @return string |
||
| 662 | */ |
||
| 663 | public function toUtf8(string $text): string |
||
| 664 | { |
||
| 665 | // ANSEL diacritics are prefixes. UTF-8 diacritics are suffixes. |
||
| 666 | $text = preg_replace('/([\xE0-\xFF]+)(.)/', '$2$1', $text); |
||
| 667 | |||
| 668 | // Simple substitution creates denormalized UTF-8. |
||
| 669 | $text = strtr($text, self::TO_UTF8); |
||
| 670 | |||
| 671 | // Convert combining diacritics into pre-composed characters. |
||
| 672 | return strtr($text, self::PRECOMPOSED_CHARACTERS); |
||
| 673 | } |
||
| 674 | |||
| 675 | /** |
||
| 676 | * Convert a string from UTF-8 to another encoding. |
||
| 677 | * |
||
| 678 | * @param string $text |
||
| 679 | * |
||
| 680 | * @return string |
||
| 681 | */ |
||
| 682 | public function fromUtf8(string $text): string |
||
| 683 | { |
||
| 684 | // Convert pre-composed characters into combining diacritics. |
||
| 685 | $text = strtr($text, array_flip(self::PRECOMPOSED_CHARACTERS)); |
||
| 686 | |||
| 687 | // ANSEL supports letters with horns, but not the combining horn. |
||
| 688 | $text = strtr($text, self::HORN_CONVERT_STEP_1); |
||
| 689 | |||
| 690 | // Convert characters and combining diacritics separately. |
||
| 691 | $text = parent::fromUtf8($text); |
||
| 692 | |||
| 693 | // ANSEL supports two letters with horns, but not the combining horn. |
||
| 694 | $text = strtr($text, self::HORN_CONVERT_STEP_2); |
||
| 695 | |||
| 696 | // ANSEL diacritics are prefixes. UTF-8 diacritics are suffixes. |
||
| 697 | $text = preg_replace('/([^\xE0-\xFF])([\xE0-\xFF]+)/', '$2$1', $text); |
||
| 698 | |||
| 699 | return $text; |
||
| 700 | } |
||
| 701 | } |
||
| 702 |