Passed
Push — dev ( 824cd4...d410ef )
by Greg
12:51
created

ANSEL::fromUtf8()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 18
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
cc 1
eloc 6
c 1
b 0
f 1
nc 1
nop 1
dl 0
loc 18
rs 10
1
<?php
2
3
/**
4
 * webtrees: online genealogy
5
 * Copyright (C) 2021 webtrees development team
6
 * This program is free software: you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation, either version 3 of the License, or
9
 * (at your option) any later version.
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU General Public License for more details.
14
 * You should have received a copy of the GNU General Public License
15
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
 */
17
18
declare(strict_types=1);
19
20
namespace Fisharebest\Webtrees\Encodings;
21
22
use function preg_replace;
23
use function strtr;
24
25
/**
26
 * Convert between UTF-8 and ANSEL encoding.
27
 *
28
 * ANSEL is the common name for the MARC-21 encoding, also known as Z39.47, which
29
 * has a number of editions.  These are denoted by a year suffix.
30
 *
31
 * The GEDCOM 5.5.1 specification (1999-10-02) specifies the Z39.47-1985 edition.
32
 * It adds Es Zett (ß) at CF.
33
 *
34
 * According to wikipedia, other non-standard characters are also added.
35
 *
36
 * HEX Unicode Glyph Description
37
 * BE  25A1    □     Empty box
38
 * BF  25A0    ■     Black box
39
 * CD  0065    e     Midline e
40
 * CE  006F    o     Midline o
41
 * CF  00DF    ß     Es Zett
42
 * FC  0338    /     Combining slash
43
 *
44
 * @link https://en.wikipedia.org/wiki/ANSEL
45
 *
46
 * The MARC-21 specification has added a number of additional characters since
47
 * the 1985 edition.
48
 *
49
 * HEX Unicode Glyph Description
50
 * 88  0098          Start of string
51
 * 89  009C          String terminator
52
 * 8D  200D          Zero width joiner
53
 * 8E  200C          Zero width non-joiner
54
 * A7  CAB9       ʹ     Single prime
55
 * AC  C6AF    Ơ     LATIN CAPITAL LETTER O WITH HORN
56
 * AD  C6AF    Ư     LATIN CAPITAL LETTER U WITH HORN
57
 * B7  CABA    ʺ     Double prime
58
 * BC  C6A1    ơ     LATIN SMALL LETTER O WITH HORN
59
 * BD  C6B0    ư     LATIN SMALL LETTER U WITH HORN
60
 * C0  C2B0    °     Degree sign
61
 * C1  E28493  ℓ     Script small L
62
 * C2  E28497  ℗     Sound recording copyright
63
 * C4  E282AC  ♯     Music sharp sign
64
 * C7  00DF    ß     Es Zett
65
 * C8  20AC    €     Euro sign
66
 * E0  0309          Hook above
67
 * EB  0361          Breve (first part / double)
68
 * EC  0361          Breve (second part)
69
 * EF  0310          Candrabindu
70
 * F2  0323          Low dot
71
 * F3  0324          Diaeresis below
72
 * F4  0325          Ring below
73
 * F5  0333          Double underline
74
 * F7  0332          Underline
75
 * F8  031C          Comma below
76
 * F9  032E          Breve below
77
 * FA  0360          Double tilde (first part / double).
78
 * FB  0360          Double tilde (second part).
79
 * FF  0338          Slash
80
 *
81
 * @link https://memory.loc.gov/diglib/codetables/45.html
82
 *
83
 * Note that this means we can expect two different representations of Es Zett.
84
 *
85
 * There are two multi-part diacritics.  There are two ways to represent these.
86
 *
87
 * ANSEL       | UTF-8         | UTF-8 (prefered)
88
 * ------------+---------------+-----------------
89
 * FA x FB y   | x FE22 y FE23 | x 0360 y
90
 * EB x EC y   | y FE20 y FE21 | x 0361 y
91
 */
92
class ANSEL extends AbstractEncoding
93
{
94
    public const NAME = 'ANSEL';
95
96
    protected const TO_UTF8 = [
97
        "\x80" => UTF8::REPLACEMENT_CHARACTER,
98
        "\x81" => UTF8::REPLACEMENT_CHARACTER,
99
        "\x82" => UTF8::REPLACEMENT_CHARACTER,
100
        "\x83" => UTF8::REPLACEMENT_CHARACTER,
101
        "\x84" => UTF8::REPLACEMENT_CHARACTER,
102
        "\x85" => UTF8::REPLACEMENT_CHARACTER,
103
        "\x86" => UTF8::REPLACEMENT_CHARACTER,
104
        "\x87" => UTF8::REPLACEMENT_CHARACTER,
105
        "\x88" => UTF8::START_OF_STRING,
106
        "\x89" => UTF8::STRING_TERMINATOR,
107
        "\x8A" => UTF8::REPLACEMENT_CHARACTER,
108
        "\x8B" => UTF8::REPLACEMENT_CHARACTER,
109
        "\x8C" => UTF8::REPLACEMENT_CHARACTER,
110
        "\x8D" => UTF8::ZERO_WIDTH_JOINER,
111
        "\x8E" => UTF8::ZERO_WIDTH_NON_JOINER,
112
        "\x8F" => UTF8::REPLACEMENT_CHARACTER,
113
        "\x90" => UTF8::REPLACEMENT_CHARACTER,
114
        "\x91" => UTF8::REPLACEMENT_CHARACTER,
115
        "\x92" => UTF8::REPLACEMENT_CHARACTER,
116
        "\x93" => UTF8::REPLACEMENT_CHARACTER,
117
        "\x94" => UTF8::REPLACEMENT_CHARACTER,
118
        "\x95" => UTF8::REPLACEMENT_CHARACTER,
119
        "\x96" => UTF8::REPLACEMENT_CHARACTER,
120
        "\x97" => UTF8::REPLACEMENT_CHARACTER,
121
        "\x98" => UTF8::REPLACEMENT_CHARACTER,
122
        "\x99" => UTF8::REPLACEMENT_CHARACTER,
123
        "\x9A" => UTF8::REPLACEMENT_CHARACTER,
124
        "\x9B" => UTF8::REPLACEMENT_CHARACTER,
125
        "\x9C" => UTF8::REPLACEMENT_CHARACTER,
126
        "\x9D" => UTF8::REPLACEMENT_CHARACTER,
127
        "\x9E" => UTF8::REPLACEMENT_CHARACTER,
128
        "\x9F" => UTF8::REPLACEMENT_CHARACTER,
129
        "\xA0" => UTF8::REPLACEMENT_CHARACTER,
130
        "\xA1" => UTF8::LATIN_CAPITAL_LETTER_L_WITH_STROKE,
131
        "\xA2" => UTF8::LATIN_CAPITAL_LETTER_O_WITH_STROKE,
132
        "\xA3" => UTF8::LATIN_CAPITAL_LETTER_D_WITH_STROKE,
133
        "\xA4" => UTF8::LATIN_CAPITAL_LETTER_THORN,
134
        "\xA5" => UTF8::LATIN_CAPITAL_LETTER_AE,
135
        "\xA6" => UTF8::LATIN_CAPITAL_LIGATURE_OE,
136
        "\xA7" => UTF8::MODIFIER_LETTER_PRIME,
137
        "\xA8" => UTF8::MIDDLE_DOT,
138
        "\xA9" => UTF8::MUSIC_FLAT_SIGN,
139
        "\xAA" => UTF8::REGISTERED_SIGN,
140
        "\xAB" => UTF8::PLUS_MINUS_SIGN,
141
        "\xAC" => UTF8::LATIN_CAPITAL_LETTER_O_WITH_HORN,
142
        "\xAD" => UTF8::LATIN_CAPITAL_LETTER_U_WITH_HORN,
143
        "\xAE" => UTF8::MODIFIER_LETTER_APOSTROPHE,
144
        "\xAF" => UTF8::REPLACEMENT_CHARACTER,
145
        "\xB0" => UTF8::MODIFIER_LETTER_TURNED_COMMA,
146
        "\xB1" => UTF8::LATIN_SMALL_LETTER_L_WITH_STROKE,
147
        "\xB2" => UTF8::LATIN_SMALL_LETTER_O_WITH_STROKE,
148
        "\xB3" => UTF8::LATIN_SMALL_LETTER_D_WITH_STROKE,
149
        "\xB4" => UTF8::LATIN_SMALL_LETTER_THORN,
150
        "\xB5" => UTF8::LATIN_SMALL_LETTER_AE,
151
        "\xB6" => UTF8::LATIN_SMALL_LIGATURE_OE,
152
        "\xB7" => UTF8::MODIFIER_LETTER_DOUBLE_PRIME,
153
        "\xB8" => UTF8::LATIN_SMALL_LETTER_DOTLESS_I,
154
        "\xB9" => UTF8::POUND_SIGN,
155
        "\xBA" => UTF8::LATIN_SMALL_LETTER_ETH,
156
        "\xBB" => UTF8::REPLACEMENT_CHARACTER,
157
        "\xBC" => UTF8::LATIN_SMALL_LETTER_O_WITH_HORN,
158
        "\xBD" => UTF8::LATIN_SMALL_LETTER_U_WITH_HORN,
159
        "\xBE" => UTF8::WHITE_SQUARE,
160
        "\xBF" => UTF8::BLACK_SQUARE,
161
        "\xC0" => UTF8::DEGREE_SIGN,
162
        "\xC1" => UTF8::SCRIPT_SMALL_L,
163
        "\xC2" => UTF8::SOUND_RECORDING_COPYRIGHT,
164
        "\xC3" => UTF8::COPYRIGHT_SIGN,
165
        "\xC4" => UTF8::MUSIC_SHARP_SIGN,
166
        "\xC5" => UTF8::INVERTED_QUESTION_MARK,
167
        "\xC6" => UTF8::INVERTED_EXCLAMATION_MARK,
168
        "\xC7" => UTF8::LATIN_CAPITAL_LETTER_SHARP_S,
169
        "\xC8" => UTF8::EURO_SIGN,
170
        "\xC9" => UTF8::REPLACEMENT_CHARACTER,
171
        "\xCA" => UTF8::REPLACEMENT_CHARACTER,
172
        "\xCB" => UTF8::REPLACEMENT_CHARACTER,
173
        "\xCC" => UTF8::REPLACEMENT_CHARACTER,
174
        "\xCD" => UTF8::REPLACEMENT_CHARACTER,
175
        "\xCE" => UTF8::REPLACEMENT_CHARACTER,
176
        "\xCF" => UTF8::LATIN_SMALL_LETTER_SHARP_S,
177
        "\xD0" => UTF8::REPLACEMENT_CHARACTER,
178
        "\xD1" => UTF8::REPLACEMENT_CHARACTER,
179
        "\xD2" => UTF8::REPLACEMENT_CHARACTER,
180
        "\xD3" => UTF8::REPLACEMENT_CHARACTER,
181
        "\xD4" => UTF8::REPLACEMENT_CHARACTER,
182
        "\xD5" => UTF8::REPLACEMENT_CHARACTER,
183
        "\xD6" => UTF8::REPLACEMENT_CHARACTER,
184
        "\xD7" => UTF8::REPLACEMENT_CHARACTER,
185
        "\xD8" => UTF8::REPLACEMENT_CHARACTER,
186
        "\xD9" => UTF8::REPLACEMENT_CHARACTER,
187
        "\xDA" => UTF8::REPLACEMENT_CHARACTER,
188
        "\xDB" => UTF8::REPLACEMENT_CHARACTER,
189
        "\xDC" => UTF8::REPLACEMENT_CHARACTER,
190
        "\xDD" => UTF8::REPLACEMENT_CHARACTER,
191
        "\xDE" => UTF8::REPLACEMENT_CHARACTER,
192
        "\xDF" => UTF8::REPLACEMENT_CHARACTER,
193
        "\xE0" => UTF8::COMBINING_HOOK_ABOVE,
194
        "\xE1" => UTF8::COMBINING_GRAVE_ACCENT,
195
        "\xE2" => UTF8::COMBINING_ACUTE_ACCENT,
196
        "\xE3" => UTF8::COMBINING_CIRCUMFLEX_ACCENT,
197
        "\xE4" => UTF8::COMBINING_TILDE,
198
        "\xE5" => UTF8::COMBINING_MACRON,
199
        "\xE6" => UTF8::COMBINING_BREVE,
200
        "\xE7" => UTF8::COMBINING_DOT_ABOVE,
201
        "\xE8" => UTF8::COMBINING_DIAERESIS,
202
        "\xE9" => UTF8::COMBINING_CARON,
203
        "\xEA" => UTF8::COMBINING_RING_ABOVE,
204
        "\xEB" => UTF8::COMBINING_DOUBLE_INVERTED_BREVE,
205
        "\xEC" => '',
206
        "\xED" => UTF8::COMBINING_COMMA_ABOVE_RIGHT,
207
        "\xEE" => UTF8::COMBINING_DOUBLE_ACUTE_ACCENT,
208
        "\xEF" => UTF8::COMBINING_CANDRABINDU,
209
        "\xF0" => UTF8::COMBINING_CEDILLA,
210
        "\xF1" => UTF8::COMBINING_OGONEK,
211
        "\xF2" => UTF8::COMBINING_DOT_BELOW,
212
        "\xF3" => UTF8::COMBINING_DIAERESIS_BELOW,
213
        "\xF4" => UTF8::COMBINING_RING_BELOW,
214
        "\xF5" => UTF8::COMBINING_DOUBLE_LOW_LINE,
215
        "\xF6" => UTF8::COMBINING_LOW_LINE,
216
        "\xF7" => UTF8::COMBINING_COMMA_BELOW,
217
        "\xF8" => UTF8::COMBINING_LEFT_HALF_RING_BELOW,
218
        "\xF9" => UTF8::COMBINING_BREVE_BELOW,
219
        "\xFA" => UTF8::COMBINING_DOUBLE_TILDE,
220
        "\xFB" => '',
221
        "\xFC" => UTF8::REPLACEMENT_CHARACTER,
222
        "\xFD" => UTF8::REPLACEMENT_CHARACTER,
223
        "\xFE" => UTF8::COMBINING_COMMA_ABOVE,
224
        "\xFF" => UTF8::COMBINING_LONG_SOLIDUS_OVERLAY,
225
    ];
226
227
    // The subset of pre-composed UTF8 characters that can be made from ANSEL characters.
228
    private const PRECOMPOSED_CHARACTERS = [
229
        'A' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_A_WITH_ACUTE,
230
        'A' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE,
231
        'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_ACUTE_ACCENT              => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_ACUTE,
232
        'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_DOT_BELOW                 => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_DOT_BELOW,
233
        'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_GRAVE_ACCENT              => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_GRAVE,
234
        'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_HOOK_ABOVE                => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_HOOK_ABOVE,
235
        'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_TILDE                     => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_TILDE,
236
        'A' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CARON,
237
        'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX,
238
        'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT  => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_ACUTE,
239
        'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW     => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_DOT_BELOW,
240
        'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT  => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_GRAVE,
241
        'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE    => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_HOOK_ABOVE,
242
        'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE         => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_TILDE,
243
        'A' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS,
244
        'A' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON                => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS_AND_MACRON,
245
        'A' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE,
246
        'A' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON                => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON,
247
        'A' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DOT_BELOW,
248
        'A' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_A_WITH_GRAVE,
249
        'A' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_A_WITH_HOOK_ABOVE,
250
        'A' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_CAPITAL_LETTER_A_WITH_MACRON,
251
        'A' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_CAPITAL_LETTER_A_WITH_OGONEK,
252
        'A' . UTF8::COMBINING_RING_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE,
253
        'A' . UTF8::COMBINING_RING_ABOVE . UTF8::COMBINING_ACUTE_ACCENT         => UTF8::LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE,
254
        'A' . UTF8::COMBINING_RING_BELOW                                        => UTF8::LATIN_CAPITAL_LETTER_A_WITH_RING_BELOW,
255
        'A' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_A_WITH_TILDE,
256
        'B' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_B_WITH_DOT_ABOVE,
257
        'B' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_B_WITH_DOT_BELOW,
258
        'C' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_C_WITH_ACUTE,
259
        'C' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CARON,
260
        'C' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CEDILLA,
261
        'C' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX,
262
        'C' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE,
263
        'C' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CEDILLA_AND_ACUTE,
264
        'D' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_D_WITH_CARON,
265
        'D' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_D_WITH_CEDILLA,
266
        'D' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_D_WITH_DOT_ABOVE,
267
        'D' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_D_WITH_DOT_BELOW,
268
        'E' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_E_WITH_ACUTE,
269
        'E' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_CAPITAL_LETTER_E_WITH_BREVE,
270
        'E' . UTF8::COMBINING_BREVE . UTF8::COMBINING_CEDILLA                   => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CEDILLA_AND_BREVE,
271
        'E' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CARON,
272
        'E' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CEDILLA,
273
        'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX,
274
        'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT  => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_ACUTE,
275
        'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW     => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_DOT_BELOW,
276
        'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT  => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_GRAVE,
277
        'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE    => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_HOOK_ABOVE,
278
        'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE         => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_TILDE,
279
        'E' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS,
280
        'E' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE,
281
        'E' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_E_WITH_DOT_BELOW,
282
        'E' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_E_WITH_GRAVE,
283
        'E' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_E_WITH_HOOK_ABOVE,
284
        'E' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_CAPITAL_LETTER_E_WITH_MACRON,
285
        'E' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT             => UTF8::LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_ACUTE,
286
        'E' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT             => UTF8::LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_GRAVE,
287
        'E' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_CAPITAL_LETTER_E_WITH_OGONEK,
288
        'E' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_E_WITH_TILDE,
289
        'F' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_F_WITH_DOT_ABOVE,
290
        'G' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_G_WITH_ACUTE,
291
        'G' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_CAPITAL_LETTER_G_WITH_BREVE,
292
        'G' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_G_WITH_CARON,
293
        'G' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_G_WITH_CEDILLA,
294
        'G' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX,
295
        'G' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE,
296
        'G' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_CAPITAL_LETTER_G_WITH_MACRON,
297
        'H' . UTF8::COMBINING_BREVE_BELOW                                       => UTF8::LATIN_CAPITAL_LETTER_H_WITH_BREVE_BELOW,
298
        'H' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_H_WITH_CARON,
299
        'H' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_H_WITH_CEDILLA,
300
        'H' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX,
301
        'H' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_H_WITH_DIAERESIS,
302
        'H' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_H_WITH_DOT_ABOVE,
303
        'H' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_H_WITH_DOT_BELOW,
304
        'I' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_I_WITH_ACUTE,
305
        'I' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_CAPITAL_LETTER_I_WITH_BREVE,
306
        'I' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_I_WITH_CARON,
307
        'I' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX,
308
        'I' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS,
309
        'I' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT          => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS_AND_ACUTE,
310
        'I' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE,
311
        'I' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DOT_BELOW,
312
        'I' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_I_WITH_GRAVE,
313
        'I' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_I_WITH_HOOK_ABOVE,
314
        'I' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_CAPITAL_LETTER_I_WITH_MACRON,
315
        'I' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_CAPITAL_LETTER_I_WITH_OGONEK,
316
        'I' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_I_WITH_TILDE,
317
        'J' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX,
318
        'K' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_K_WITH_CARON,
319
        'K' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_K_WITH_CEDILLA,
320
        'K' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_K_WITH_ACUTE,
321
        'K' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_K_WITH_DOT_BELOW,
322
        'L' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_L_WITH_ACUTE,
323
        'L' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_L_WITH_CARON,
324
        'L' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_L_WITH_CEDILLA,
325
        'L' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_L_WITH_DOT_BELOW,
326
        'L' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON                => UTF8::LATIN_CAPITAL_LETTER_L_WITH_DOT_BELOW_AND_MACRON,
327
        'M' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_M_WITH_ACUTE,
328
        'M' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_M_WITH_DOT_ABOVE,
329
        'M' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_M_WITH_DOT_BELOW,
330
        'N' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_N_WITH_ACUTE,
331
        'N' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_N_WITH_CARON,
332
        'N' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_N_WITH_CEDILLA,
333
        'N' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_N_WITH_DOT_ABOVE,
334
        'N' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_N_WITH_DOT_BELOW,
335
        'N' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_N_WITH_GRAVE,
336
        'N' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_N_WITH_TILDE,
337
        'O' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_O_WITH_ACUTE,
338
        'O' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_CAPITAL_LETTER_O_WITH_BREVE,
339
        'O' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CARON,
340
        'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX,
341
        'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT  => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_ACUTE,
342
        'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW     => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_DOT_BELOW,
343
        'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT  => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_GRAVE,
344
        'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE    => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_HOOK_ABOVE,
345
        'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE         => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_TILDE,
346
        'O' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS,
347
        'O' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON                => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS_AND_MACRON,
348
        'O' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE,
349
        'O' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON                => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON,
350
        'O' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOT_BELOW,
351
        'O' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT                               => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOUBLE_ACUTE,
352
        'O' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_O_WITH_GRAVE,
353
        'O' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_O_WITH_HOOK_ABOVE,
354
        'O' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_CAPITAL_LETTER_O_WITH_MACRON,
355
        'O' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT             => UTF8::LATIN_CAPITAL_LETTER_O_WITH_MACRON_AND_ACUTE,
356
        'O' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT             => UTF8::LATIN_CAPITAL_LETTER_O_WITH_MACRON_AND_GRAVE,
357
        'O' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_CAPITAL_LETTER_O_WITH_OGONEK,
358
        'O' . UTF8::COMBINING_OGONEK . UTF8::COMBINING_MACRON                   => UTF8::LATIN_CAPITAL_LETTER_O_WITH_OGONEK_AND_MACRON,
359
        'O' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE,
360
        'O' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT              => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_ACUTE,
361
        'O' . UTF8::COMBINING_TILDE . UTF8::COMBINING_DIAERESIS                 => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_DIAERESIS,
362
        'O' . UTF8::COMBINING_TILDE . UTF8::COMBINING_MACRON                    => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_MACRON,
363
        'P' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_P_WITH_ACUTE,
364
        'P' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_P_WITH_DOT_ABOVE,
365
        'R' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_R_WITH_ACUTE,
366
        'R' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_R_WITH_CARON,
367
        'R' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_R_WITH_CEDILLA,
368
        'R' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_R_WITH_DOT_ABOVE,
369
        'R' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_R_WITH_DOT_BELOW,
370
        'R' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON                => UTF8::LATIN_CAPITAL_LETTER_R_WITH_DOT_BELOW_AND_MACRON,
371
        'S' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_S_WITH_ACUTE,
372
        'S' . UTF8::COMBINING_ACUTE_ACCENT . UTF8::COMBINING_DOT_ABOVE          => UTF8::LATIN_CAPITAL_LETTER_S_WITH_ACUTE_AND_DOT_ABOVE,
373
        'S' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CARON,
374
        'S' . UTF8::COMBINING_CARON . UTF8::COMBINING_DOT_ABOVE                 => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CARON_AND_DOT_ABOVE,
375
        'S' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CEDILLA,
376
        'S' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX,
377
        'S' . UTF8::COMBINING_COMMA_BELOW                                       => UTF8::LATIN_CAPITAL_LETTER_S_WITH_COMMA_BELOW,
378
        'S' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_S_WITH_DOT_ABOVE,
379
        'S' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_S_WITH_DOT_BELOW,
380
        'S' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_DOT_ABOVE             => UTF8::LATIN_CAPITAL_LETTER_S_WITH_DOT_BELOW_AND_DOT_ABOVE,
381
        'T' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_T_WITH_CARON,
382
        'T' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_T_WITH_CEDILLA,
383
        'T' . UTF8::COMBINING_COMMA_BELOW                                       => UTF8::LATIN_CAPITAL_LETTER_T_WITH_COMMA_BELOW,
384
        'T' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_T_WITH_DOT_ABOVE,
385
        'T' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_T_WITH_DOT_BELOW,
386
        'U' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_U_WITH_ACUTE,
387
        'U' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_CAPITAL_LETTER_U_WITH_BREVE,
388
        'U' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_U_WITH_CARON,
389
        'U' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX,
390
        'U' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS,
391
        'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT          => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_ACUTE,
392
        'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_CARON                 => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_CARON,
393
        'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_GRAVE_ACCENT          => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_GRAVE,
394
        'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON                => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_MACRON,
395
        'U' . UTF8::COMBINING_DIAERESIS_BELOW                                   => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_BELOW,
396
        'U' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DOT_BELOW,
397
        'U' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT                               => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DOUBLE_ACUTE,
398
        'U' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_U_WITH_GRAVE,
399
        'U' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_U_WITH_HOOK_ABOVE,
400
        'U' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_CAPITAL_LETTER_U_WITH_MACRON,
401
        'U' . UTF8::COMBINING_MACRON . UTF8::COMBINING_DIAERESIS                => UTF8::LATIN_CAPITAL_LETTER_U_WITH_MACRON_AND_DIAERESIS,
402
        'U' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_CAPITAL_LETTER_U_WITH_OGONEK,
403
        'U' . UTF8::COMBINING_RING_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE,
404
        'U' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_U_WITH_TILDE,
405
        'U' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT              => UTF8::LATIN_CAPITAL_LETTER_U_WITH_TILDE_AND_ACUTE,
406
        'V' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_V_WITH_DOT_BELOW,
407
        'V' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_V_WITH_TILDE,
408
        'W' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_W_WITH_ACUTE,
409
        'W' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX,
410
        'W' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_W_WITH_DIAERESIS,
411
        'W' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_W_WITH_DOT_ABOVE,
412
        'W' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_W_WITH_DOT_BELOW,
413
        'W' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_W_WITH_GRAVE,
414
        'X' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_X_WITH_DIAERESIS,
415
        'X' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_X_WITH_DOT_ABOVE,
416
        'Y' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_ACUTE,
417
        'Y' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX,
418
        'Y' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS,
419
        'Y' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_DOT_ABOVE,
420
        'Y' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_DOT_BELOW,
421
        'Y' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_GRAVE,
422
        'Y' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_HOOK_ABOVE,
423
        'Y' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_MACRON,
424
        'Y' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_TILDE,
425
        'Z' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_ACUTE,
426
        'Z' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_CARON,
427
        'Z' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_CIRCUMFLEX,
428
        'Z' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE,
429
        'Z' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_DOT_BELOW,
430
        'a' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_A_WITH_ACUTE,
431
        'a' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE,
432
        'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_ACUTE_ACCENT              => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_ACUTE,
433
        'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_DOT_BELOW                 => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_DOT_BELOW,
434
        'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_GRAVE_ACCENT              => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_GRAVE,
435
        'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_HOOK_ABOVE                => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_HOOK_ABOVE,
436
        'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_TILDE                     => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_TILDE,
437
        'a' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_A_WITH_CARON,
438
        'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX,
439
        'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT  => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_ACUTE,
440
        'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW     => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_DOT_BELOW,
441
        'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT  => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_GRAVE,
442
        'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE    => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_HOOK_ABOVE,
443
        'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE         => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_TILDE,
444
        'a' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_A_WITH_DIAERESIS,
445
        'a' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON                => UTF8::LATIN_SMALL_LETTER_A_WITH_DIAERESIS_AND_MACRON,
446
        'a' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE,
447
        'a' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON                => UTF8::LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON,
448
        'a' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_A_WITH_DOT_BELOW,
449
        'a' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_A_WITH_GRAVE,
450
        'a' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_A_WITH_HOOK_ABOVE,
451
        'a' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_SMALL_LETTER_A_WITH_MACRON,
452
        'a' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_SMALL_LETTER_A_WITH_OGONEK,
453
        'a' . UTF8::COMBINING_RING_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_A_WITH_RING_ABOVE,
454
        'a' . UTF8::COMBINING_RING_ABOVE . UTF8::COMBINING_ACUTE_ACCENT         => UTF8::LATIN_SMALL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE,
455
        'a' . UTF8::COMBINING_RING_BELOW                                        => UTF8::LATIN_SMALL_LETTER_A_WITH_RING_BELOW,
456
        'a' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_A_WITH_TILDE,
457
        'b' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_B_WITH_DOT_ABOVE,
458
        'b' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_B_WITH_DOT_BELOW,
459
        'c' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_C_WITH_ACUTE,
460
        'c' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_C_WITH_CARON,
461
        'c' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_C_WITH_CEDILLA,
462
        'c' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX,
463
        'c' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE,
464
        'c' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_C_WITH_CEDILLA_AND_ACUTE,
465
        'd' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_D_WITH_CARON,
466
        'd' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_D_WITH_CEDILLA,
467
        'd' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_D_WITH_DOT_ABOVE,
468
        'd' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_D_WITH_DOT_BELOW,
469
        'e' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_E_WITH_ACUTE,
470
        'e' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_SMALL_LETTER_E_WITH_BREVE,
471
        'e' . UTF8::COMBINING_BREVE . UTF8::COMBINING_CEDILLA                   => UTF8::LATIN_SMALL_LETTER_E_WITH_CEDILLA_AND_BREVE,
472
        'e' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_E_WITH_CARON,
473
        'e' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_E_WITH_CEDILLA,
474
        'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX,
475
        'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT  => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_ACUTE,
476
        'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW     => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_DOT_BELOW,
477
        'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT  => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_GRAVE,
478
        'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE    => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_HOOK_ABOVE,
479
        'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE         => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_TILDE,
480
        'e' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_E_WITH_DIAERESIS,
481
        'e' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE,
482
        'e' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_E_WITH_DOT_BELOW,
483
        'e' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_E_WITH_GRAVE,
484
        'e' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_E_WITH_HOOK_ABOVE,
485
        'e' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_SMALL_LETTER_E_WITH_MACRON,
486
        'e' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT             => UTF8::LATIN_SMALL_LETTER_E_WITH_MACRON_AND_ACUTE,
487
        'e' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT             => UTF8::LATIN_SMALL_LETTER_E_WITH_MACRON_AND_GRAVE,
488
        'e' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_SMALL_LETTER_E_WITH_OGONEK,
489
        'e' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_E_WITH_TILDE,
490
        'f' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_F_WITH_DOT_ABOVE,
491
        'g' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_G_WITH_ACUTE,
492
        'g' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_SMALL_LETTER_G_WITH_BREVE,
493
        'g' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_G_WITH_CARON,
494
        'g' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_G_WITH_CEDILLA,
495
        'g' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX,
496
        'g' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE,
497
        'g' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_SMALL_LETTER_G_WITH_MACRON,
498
        'h' . UTF8::COMBINING_BREVE_BELOW                                       => UTF8::LATIN_SMALL_LETTER_H_WITH_BREVE_BELOW,
499
        'h' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_H_WITH_CARON,
500
        'h' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_H_WITH_CEDILLA,
501
        'h' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX,
502
        'h' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_H_WITH_DIAERESIS,
503
        'h' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_H_WITH_DOT_ABOVE,
504
        'h' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_H_WITH_DOT_BELOW,
505
        'i' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_I_WITH_ACUTE,
506
        'i' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_SMALL_LETTER_I_WITH_BREVE,
507
        'i' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_I_WITH_CARON,
508
        'i' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX,
509
        'i' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_I_WITH_DIAERESIS,
510
        'i' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT          => UTF8::LATIN_SMALL_LETTER_I_WITH_DIAERESIS_AND_ACUTE,
511
        'i' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_I_WITH_DOT_BELOW,
512
        'i' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_I_WITH_GRAVE,
513
        'i' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_I_WITH_HOOK_ABOVE,
514
        'i' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_SMALL_LETTER_I_WITH_MACRON,
515
        'i' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_SMALL_LETTER_I_WITH_OGONEK,
516
        'i' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_I_WITH_TILDE,
517
        'j' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_J_WITH_CARON,
518
        'j' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX,
519
        'k' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_K_WITH_CARON,
520
        'k' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_K_WITH_CEDILLA,
521
        'k' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_K_WITH_ACUTE,
522
        'k' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_K_WITH_DOT_BELOW,
523
        'l' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_L_WITH_ACUTE,
524
        'l' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_L_WITH_CARON,
525
        'l' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_L_WITH_CEDILLA,
526
        'l' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_L_WITH_DOT_BELOW,
527
        'l' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON                => UTF8::LATIN_SMALL_LETTER_L_WITH_DOT_BELOW_AND_MACRON,
528
        'm' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_M_WITH_ACUTE,
529
        'm' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_M_WITH_DOT_ABOVE,
530
        'm' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_M_WITH_DOT_BELOW,
531
        'n' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_N_WITH_ACUTE,
532
        'n' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_N_WITH_CARON,
533
        'n' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_N_WITH_CEDILLA,
534
        'n' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_N_WITH_DOT_ABOVE,
535
        'n' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_N_WITH_DOT_BELOW,
536
        'n' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_N_WITH_GRAVE,
537
        'n' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_N_WITH_TILDE,
538
        'o' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_O_WITH_ACUTE,
539
        'o' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_SMALL_LETTER_O_WITH_BREVE,
540
        'o' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_O_WITH_CARON,
541
        'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX,
542
        'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT  => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_ACUTE,
543
        'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW     => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_DOT_BELOW,
544
        'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT  => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_GRAVE,
545
        'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE    => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_HOOK_ABOVE,
546
        'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE         => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_TILDE,
547
        'o' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_O_WITH_DIAERESIS,
548
        'o' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON                => UTF8::LATIN_SMALL_LETTER_O_WITH_DIAERESIS_AND_MACRON,
549
        'o' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE,
550
        'o' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON                => UTF8::LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON,
551
        'o' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_O_WITH_DOT_BELOW,
552
        'o' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT                               => UTF8::LATIN_SMALL_LETTER_O_WITH_DOUBLE_ACUTE,
553
        'o' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_O_WITH_GRAVE,
554
        'o' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_O_WITH_HOOK_ABOVE,
555
        'o' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_SMALL_LETTER_O_WITH_MACRON,
556
        'o' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT             => UTF8::LATIN_SMALL_LETTER_O_WITH_MACRON_AND_ACUTE,
557
        'o' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT             => UTF8::LATIN_SMALL_LETTER_O_WITH_MACRON_AND_GRAVE,
558
        'o' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_SMALL_LETTER_O_WITH_OGONEK,
559
        'o' . UTF8::COMBINING_OGONEK . UTF8::COMBINING_MACRON                   => UTF8::LATIN_SMALL_LETTER_O_WITH_OGONEK_AND_MACRON,
560
        'o' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE,
561
        'o' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT              => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE_AND_ACUTE,
562
        'o' . UTF8::COMBINING_TILDE . UTF8::COMBINING_DIAERESIS                 => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE_AND_DIAERESIS,
563
        'o' . UTF8::COMBINING_TILDE . UTF8::COMBINING_MACRON                    => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE_AND_MACRON,
564
        'p' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_P_WITH_ACUTE,
565
        'p' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_P_WITH_DOT_ABOVE,
566
        'r' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_R_WITH_ACUTE,
567
        'r' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_R_WITH_CARON,
568
        'r' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_R_WITH_CEDILLA,
569
        'r' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_R_WITH_DOT_ABOVE,
570
        'r' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_R_WITH_DOT_BELOW,
571
        'r' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON                => UTF8::LATIN_SMALL_LETTER_R_WITH_DOT_BELOW_AND_MACRON,
572
        's' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_S_WITH_ACUTE,
573
        's' . UTF8::COMBINING_ACUTE_ACCENT . UTF8::COMBINING_DOT_ABOVE          => UTF8::LATIN_SMALL_LETTER_S_WITH_ACUTE_AND_DOT_ABOVE,
574
        's' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_S_WITH_CARON,
575
        's' . UTF8::COMBINING_CARON . UTF8::COMBINING_DOT_ABOVE                 => UTF8::LATIN_SMALL_LETTER_S_WITH_CARON_AND_DOT_ABOVE,
576
        's' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_S_WITH_CEDILLA,
577
        's' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX,
578
        's' . UTF8::COMBINING_COMMA_BELOW                                       => UTF8::LATIN_SMALL_LETTER_S_WITH_COMMA_BELOW,
579
        's' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_S_WITH_DOT_ABOVE,
580
        's' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_S_WITH_DOT_BELOW,
581
        's' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_DOT_ABOVE             => UTF8::LATIN_SMALL_LETTER_S_WITH_DOT_BELOW_AND_DOT_ABOVE,
582
        't' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_T_WITH_CARON,
583
        't' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_T_WITH_CEDILLA,
584
        't' . UTF8::COMBINING_COMMA_BELOW                                       => UTF8::LATIN_SMALL_LETTER_T_WITH_COMMA_BELOW,
585
        't' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_T_WITH_DIAERESIS,
586
        't' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_T_WITH_DOT_ABOVE,
587
        't' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_T_WITH_DOT_BELOW,
588
        'u' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_U_WITH_ACUTE,
589
        'u' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_SMALL_LETTER_U_WITH_BREVE,
590
        'u' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_U_WITH_CARON,
591
        'u' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX,
592
        'u' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS,
593
        'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT          => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_ACUTE,
594
        'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_CARON                 => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_CARON,
595
        'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_GRAVE_ACCENT          => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_GRAVE,
596
        'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON                => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_MACRON,
597
        'u' . UTF8::COMBINING_DIAERESIS_BELOW                                   => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_BELOW,
598
        'u' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_U_WITH_DOT_BELOW,
599
        'u' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT                               => UTF8::LATIN_SMALL_LETTER_U_WITH_DOUBLE_ACUTE,
600
        'u' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_U_WITH_GRAVE,
601
        'u' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_U_WITH_HOOK_ABOVE,
602
        'u' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_SMALL_LETTER_U_WITH_MACRON,
603
        'u' . UTF8::COMBINING_MACRON . UTF8::COMBINING_DIAERESIS                => UTF8::LATIN_SMALL_LETTER_U_WITH_MACRON_AND_DIAERESIS,
604
        'u' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_SMALL_LETTER_U_WITH_OGONEK,
605
        'u' . UTF8::COMBINING_RING_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_U_WITH_RING_ABOVE,
606
        'u' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_U_WITH_TILDE,
607
        'u' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT              => UTF8::LATIN_SMALL_LETTER_U_WITH_TILDE_AND_ACUTE,
608
        'v' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_V_WITH_DOT_BELOW,
609
        'v' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_V_WITH_TILDE,
610
        'w' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_W_WITH_ACUTE,
611
        'w' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX,
612
        'w' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_W_WITH_DIAERESIS,
613
        'w' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_W_WITH_DOT_ABOVE,
614
        'w' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_W_WITH_DOT_BELOW,
615
        'w' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_W_WITH_GRAVE,
616
        'w' . UTF8::COMBINING_RING_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_W_WITH_RING_ABOVE,
617
        'x' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_X_WITH_DIAERESIS,
618
        'x' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_X_WITH_DOT_ABOVE,
619
        'y' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_Y_WITH_ACUTE,
620
        'y' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX,
621
        'y' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS,
622
        'y' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_Y_WITH_DOT_ABOVE,
623
        'y' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_Y_WITH_DOT_BELOW,
624
        'y' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_Y_WITH_GRAVE,
625
        'y' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_Y_WITH_HOOK_ABOVE,
626
        'y' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_SMALL_LETTER_Y_WITH_MACRON,
627
        'y' . UTF8::COMBINING_RING_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_Y_WITH_RING_ABOVE,
628
        'y' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_Y_WITH_TILDE,
629
        'z' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_Z_WITH_ACUTE,
630
        'z' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_Z_WITH_CARON,
631
        'z' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_Z_WITH_CIRCUMFLEX,
632
        'z' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE,
633
        'z' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_Z_WITH_DOT_BELOW,
634
        UTF8::LATIN_CAPITAL_LETTER_AE . UTF8::COMBINING_ACUTE_ACCENT            => UTF8::LATIN_CAPITAL_LETTER_AE_WITH_ACUTE,
635
        UTF8::LATIN_CAPITAL_LETTER_AE . UTF8::COMBINING_MACRON                  => UTF8::LATIN_CAPITAL_LETTER_AE_WITH_MACRON,
636
        UTF8::LATIN_CAPITAL_LETTER_O_WITH_STROKE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_STROKE_AND_ACUTE,
637
        UTF8::LATIN_SMALL_LETTER_AE . UTF8::COMBINING_ACUTE_ACCENT              => UTF8::LATIN_SMALL_LETTER_AE_WITH_ACUTE,
638
        UTF8::LATIN_SMALL_LETTER_AE . UTF8::COMBINING_MACRON                    => UTF8::LATIN_SMALL_LETTER_AE_WITH_MACRON,
639
        UTF8::LATIN_SMALL_LETTER_O_WITH_STROKE . UTF8::COMBINING_ACUTE_ACCENT   => UTF8::LATIN_SMALL_LETTER_O_WITH_STROKE_AND_ACUTE,
640
    ];
641
642
    // ANSEL supports O and U with a horn diacritic, but not the combining diacritic.
643
    private const HORN_CONVERT_STEP_1 = [
644
        'O' . UTF8::COMBINING_HORN => "\x00O_WITH_HORN\x00",
645
        'U' . UTF8::COMBINING_HORN => "\x00U_WITH_HORN\x00",
646
        'o' . UTF8::COMBINING_HORN => "\x00o_WITH_HORN\x00",
647
        'u' . UTF8::COMBINING_HORN => "\x00u_WITH_HORN\x00",
648
    ];
649
    private const HORN_CONVERT_STEP_2 = [
650
        "\x00O_WITH_HORN\x00" => "\xAC",
651
        "\x00U_WITH_HORN\x00" => "\xAD",
652
        "\x00o_WITH_HORN\x00" => "\xBC",
653
        "\x00u_WITH_HORN\x00" => "\xBD",
654
    ];
655
656
    /**
657
     * Convert a string from another encoding to UTF-8.
658
     *
659
     * @param string $text
660
     *
661
     * @return string
662
     */
663
    public function toUtf8(string $text): string
664
    {
665
        // ANSEL diacritics are prefixes.  UTF-8 diacritics are suffixes.
666
        $text = preg_replace('/([\xE0-\xFF]+)(.)/', '$2$1', $text);
667
668
        // Simple substitution creates denormalized UTF-8.
669
        $text = strtr($text, self::TO_UTF8);
670
671
        // Convert combining diacritics into pre-composed characters.
672
        return strtr($text, self::PRECOMPOSED_CHARACTERS);
673
    }
674
675
    /**
676
     * Convert a string from UTF-8 to another encoding.
677
     *
678
     * @param string $text
679
     *
680
     * @return string
681
     */
682
    public function fromUtf8(string $text): string
683
    {
684
        // Convert pre-composed characters into combining diacritics.
685
        $text = strtr($text, array_flip(self::PRECOMPOSED_CHARACTERS));
686
687
        // ANSEL supports letters with horns, but not the combining horn.
688
        $text = strtr($text, self::HORN_CONVERT_STEP_1);
689
690
        // Convert characters and combining diacritics separately.
691
        $text = parent::fromUtf8($text);
692
693
        // ANSEL supports two letters with horns, but not the combining horn.
694
        $text = strtr($text, self::HORN_CONVERT_STEP_2);
695
696
        // ANSEL diacritics are prefixes.  UTF-8 diacritics are suffixes.
697
        $text = preg_replace('/([^\xE0-\xFF])([\xE0-\xFF]+)/', '$2$1', $text);
698
699
        return $text;
700
    }
701
}
702