1 | <?php |
||
2 | /* |
||
3 | * File: EncodingAliases.php |
||
4 | * Category: - |
||
5 | * Author: S. Todorov (https://github.com/todorowww) |
||
6 | * Created: 23.04.18 14:16 |
||
7 | * Updated: - |
||
8 | * |
||
9 | * Description: |
||
10 | * Contains email encoding aliases, thta can occur when fetching emails. These sometimes can break icvon() |
||
11 | * This file attempts to correct this by using a list of aliases and their mappings to supported iconv() encodings |
||
12 | */ |
||
13 | |||
14 | namespace Webklex\PHPIMAP; |
||
15 | |||
16 | /** |
||
17 | * Class EncodingAliases |
||
18 | * |
||
19 | * @package Webklex\PHPIMAP |
||
20 | */ |
||
21 | class EncodingAliases { |
||
22 | |||
23 | /** |
||
24 | * Contains email encoding mappings |
||
25 | * |
||
26 | * @var array |
||
27 | */ |
||
28 | private static $aliases = [ |
||
29 | /* |
||
30 | |-------------------------------------------------------------------------- |
||
31 | | Email encoding aliases |
||
32 | |-------------------------------------------------------------------------- |
||
33 | | |
||
34 | | Email encoding aliases used to convert to iconv supported charsets |
||
35 | | |
||
36 | | |
||
37 | | This Source Code Form is subject to the terms of the Mozilla Public |
||
38 | | License, v. 2.0. If a copy of the MPL was not distributed with this |
||
39 | | file, You can obtain one at http://mozilla.org/MPL/2.0/. |
||
40 | | |
||
41 | | This Original Code has been modified by IBM Corporation. |
||
42 | | Modifications made by IBM described herein are |
||
43 | | Copyright (c) International Business Machines |
||
44 | | Corporation, 1999 |
||
45 | | |
||
46 | | Modifications to Mozilla code or documentation |
||
47 | | identified per MPL Section 3.3 |
||
48 | | |
||
49 | | Date Modified by Description of modification |
||
50 | | 12/09/1999 IBM Corp. Support for IBM codepages - 850,852,855,857,862,864 |
||
51 | | |
||
52 | | Rule of this file: |
||
53 | | 1. key should always be in lower case ascii so we can do case insensitive |
||
54 | | comparison in the code faster. |
||
55 | | 2. value should be the one used in unicode converter |
||
56 | | |
||
57 | | 3. If the charset is not used for document charset, but font charset |
||
58 | | (e.g. XLFD charset- such as JIS x0201, JIS x0208), don't put here |
||
59 | | |
||
60 | */ |
||
61 | "ascii" => "us-ascii", |
||
62 | "us-ascii" => "us-ascii", |
||
63 | "ansi_x3.4-1968" => "us-ascii", |
||
64 | "646" => "us-ascii", |
||
65 | "iso-8859-1" => "ISO-8859-1", |
||
66 | "iso-8859-2" => "ISO-8859-2", |
||
67 | "iso-8859-3" => "ISO-8859-3", |
||
68 | "iso-8859-4" => "ISO-8859-4", |
||
69 | "iso-8859-5" => "ISO-8859-5", |
||
70 | "iso-8859-6" => "ISO-8859-6", |
||
71 | "iso-8859-6-i" => "ISO-8859-6-I", |
||
72 | "iso-8859-6-e" => "ISO-8859-6-E", |
||
73 | "iso-8859-7" => "ISO-8859-7", |
||
74 | "iso-8859-8" => "ISO-8859-8", |
||
75 | "iso-8859-8-i" => "ISO-8859-8-I", |
||
76 | "iso-8859-8-e" => "ISO-8859-8-E", |
||
77 | "iso-8859-9" => "ISO-8859-9", |
||
78 | "iso-8859-10" => "ISO-8859-10", |
||
79 | "iso-8859-11" => "ISO-8859-11", |
||
80 | "iso-8859-13" => "ISO-8859-13", |
||
81 | "iso-8859-14" => "ISO-8859-14", |
||
82 | "iso-8859-15" => "ISO-8859-15", |
||
83 | "iso-8859-16" => "ISO-8859-16", |
||
84 | "iso-ir-111" => "ISO-IR-111", |
||
85 | "iso-2022-cn" => "ISO-2022-CN", |
||
86 | "iso-2022-cn-ext" => "ISO-2022-CN", |
||
87 | "iso-2022-kr" => "ISO-2022-KR", |
||
88 | "iso-2022-jp" => "ISO-2022-JP", |
||
89 | "utf-16be" => "UTF-16BE", |
||
90 | "utf-16le" => "UTF-16LE", |
||
91 | "utf-16" => "UTF-16", |
||
92 | "windows-1250" => "windows-1250", |
||
93 | "windows-1251" => "windows-1251", |
||
94 | "windows-1252" => "windows-1252", |
||
95 | "windows-1253" => "windows-1253", |
||
96 | "windows-1254" => "windows-1254", |
||
97 | "windows-1255" => "windows-1255", |
||
98 | "windows-1256" => "windows-1256", |
||
99 | "windows-1257" => "windows-1257", |
||
100 | "windows-1258" => "windows-1258", |
||
101 | "ibm866" => "IBM866", |
||
102 | "ibm850" => "IBM850", |
||
103 | "ibm852" => "IBM852", |
||
104 | "ibm855" => "IBM855", |
||
105 | "ibm857" => "IBM857", |
||
106 | "ibm862" => "IBM862", |
||
107 | "ibm864" => "IBM864", |
||
108 | "utf-8" => "UTF-8", |
||
109 | "utf-7" => "UTF-7", |
||
110 | "shift_jis" => "Shift_JIS", |
||
111 | "big5" => "Big5", |
||
112 | "euc-jp" => "EUC-JP", |
||
113 | "euc-kr" => "EUC-KR", |
||
114 | "gb2312" => "GB2312", |
||
115 | "gb18030" => "gb18030", |
||
116 | "viscii" => "VISCII", |
||
117 | "koi8-r" => "KOI8-R", |
||
118 | "koi8_r" => "KOI8-R", |
||
119 | "cskoi8r" => "KOI8-R", |
||
120 | "koi" => "KOI8-R", |
||
121 | "koi8" => "KOI8-R", |
||
122 | "koi8-u" => "KOI8-U", |
||
123 | "tis-620" => "TIS-620", |
||
124 | "t.61-8bit" => "T.61-8bit", |
||
125 | "hz-gb-2312" => "HZ-GB-2312", |
||
126 | "big5-hkscs" => "Big5-HKSCS", |
||
127 | "gbk" => "gbk", |
||
128 | "cns11643" => "x-euc-tw", |
||
129 | // |
||
130 | // Aliases for ISO-8859-1 |
||
131 | // |
||
132 | "latin1" => "ISO-8859-1", |
||
133 | "iso_8859-1" => "ISO-8859-1", |
||
134 | "iso8859-1" => "ISO-8859-1", |
||
135 | "iso8859-2" => "ISO-8859-2", |
||
136 | "iso8859-3" => "ISO-8859-3", |
||
137 | "iso8859-4" => "ISO-8859-4", |
||
138 | "iso8859-5" => "ISO-8859-5", |
||
139 | "iso8859-6" => "ISO-8859-6", |
||
140 | "iso8859-7" => "ISO-8859-7", |
||
141 | "iso8859-8" => "ISO-8859-8", |
||
142 | "iso8859-9" => "ISO-8859-9", |
||
143 | "iso8859-10" => "ISO-8859-10", |
||
144 | "iso8859-11" => "ISO-8859-11", |
||
145 | "iso8859-13" => "ISO-8859-13", |
||
146 | "iso8859-14" => "ISO-8859-14", |
||
147 | "iso8859-15" => "ISO-8859-15", |
||
148 | "iso_8859-1:1987" => "ISO-8859-1", |
||
149 | "iso-ir-100" => "ISO-8859-1", |
||
150 | "l1" => "ISO-8859-1", |
||
151 | "ibm819" => "ISO-8859-1", |
||
152 | "cp819" => "ISO-8859-1", |
||
153 | "csisolatin1" => "ISO-8859-1", |
||
154 | // |
||
155 | // Aliases for ISO-8859-2 |
||
156 | // |
||
157 | "latin2" => "ISO-8859-2", |
||
158 | "iso_8859-2" => "ISO-8859-2", |
||
159 | "iso_8859-2:1987" => "ISO-8859-2", |
||
160 | "iso-ir-101" => "ISO-8859-2", |
||
161 | "l2" => "ISO-8859-2", |
||
162 | "csisolatin2" => "ISO-8859-2", |
||
163 | // |
||
164 | // Aliases for ISO-8859-3 |
||
165 | // |
||
166 | "latin3" => "ISO-8859-3", |
||
167 | "iso_8859-3" => "ISO-8859-3", |
||
168 | "iso_8859-3:1988" => "ISO-8859-3", |
||
169 | "iso-ir-109" => "ISO-8859-3", |
||
170 | "l3" => "ISO-8859-3", |
||
171 | "csisolatin3" => "ISO-8859-3", |
||
172 | // |
||
173 | // Aliases for ISO-8859-4 |
||
174 | // |
||
175 | "latin4" => "ISO-8859-4", |
||
176 | "iso_8859-4" => "ISO-8859-4", |
||
177 | "iso_8859-4:1988" => "ISO-8859-4", |
||
178 | "iso-ir-110" => "ISO-8859-4", |
||
179 | "l4" => "ISO-8859-4", |
||
180 | "csisolatin4" => "ISO-8859-4", |
||
181 | // |
||
182 | // Aliases for ISO-8859-5 |
||
183 | // |
||
184 | "cyrillic" => "ISO-8859-5", |
||
185 | "iso_8859-5" => "ISO-8859-5", |
||
186 | "iso_8859-5:1988" => "ISO-8859-5", |
||
187 | "iso-ir-144" => "ISO-8859-5", |
||
188 | "csisolatincyrillic" => "ISO-8859-5", |
||
189 | // |
||
190 | // Aliases for ISO-8859-6 |
||
191 | // |
||
192 | "arabic" => "ISO-8859-6", |
||
193 | "iso_8859-6" => "ISO-8859-6", |
||
194 | "iso_8859-6:1987" => "ISO-8859-6", |
||
195 | "iso-ir-127" => "ISO-8859-6", |
||
196 | "ecma-114" => "ISO-8859-6", |
||
197 | "asmo-708" => "ISO-8859-6", |
||
198 | "csisolatinarabic" => "ISO-8859-6", |
||
199 | // |
||
200 | // Aliases for ISO-8859-6-I |
||
201 | // |
||
202 | "csiso88596i" => "ISO-8859-6-I", |
||
203 | // |
||
204 | // Aliases for ISO-8859-6-E", |
||
205 | // |
||
206 | "csiso88596e" => "ISO-8859-6-E", |
||
207 | // |
||
208 | // Aliases for ISO-8859-7", |
||
209 | // |
||
210 | "greek" => "ISO-8859-7", |
||
211 | "greek8" => "ISO-8859-7", |
||
212 | "sun_eu_greek" => "ISO-8859-7", |
||
213 | "iso_8859-7" => "ISO-8859-7", |
||
214 | "iso_8859-7:1987" => "ISO-8859-7", |
||
215 | "iso-ir-126" => "ISO-8859-7", |
||
216 | "elot_928" => "ISO-8859-7", |
||
217 | "ecma-118" => "ISO-8859-7", |
||
218 | "csisolatingreek" => "ISO-8859-7", |
||
219 | // |
||
220 | // Aliases for ISO-8859-8", |
||
221 | // |
||
222 | "hebrew" => "ISO-8859-8", |
||
223 | "iso_8859-8" => "ISO-8859-8", |
||
224 | "visual" => "ISO-8859-8", |
||
225 | "iso_8859-8:1988" => "ISO-8859-8", |
||
226 | "iso-ir-138" => "ISO-8859-8", |
||
227 | "csisolatinhebrew" => "ISO-8859-8", |
||
228 | // |
||
229 | // Aliases for ISO-8859-8-I", |
||
230 | // |
||
231 | "csiso88598i" => "ISO-8859-8-I", |
||
232 | "iso-8859-8i" => "ISO-8859-8-I", |
||
233 | "logical" => "ISO-8859-8-I", |
||
234 | // |
||
235 | // Aliases for ISO-8859-8-E", |
||
236 | // |
||
237 | "csiso88598e" => "ISO-8859-8-E", |
||
238 | // |
||
239 | // Aliases for ISO-8859-9", |
||
240 | // |
||
241 | "latin5" => "ISO-8859-9", |
||
242 | "iso_8859-9" => "ISO-8859-9", |
||
243 | "iso_8859-9:1989" => "ISO-8859-9", |
||
244 | "iso-ir-148" => "ISO-8859-9", |
||
245 | "l5" => "ISO-8859-9", |
||
246 | "csisolatin5" => "ISO-8859-9", |
||
247 | // |
||
248 | // Aliases for UTF-8", |
||
249 | // |
||
250 | "unicode-1-1-utf-8" => "UTF-8", |
||
251 | // nl_langinfo(CODESET) in HP/UX returns 'utf8' under UTF-8 locales", |
||
252 | "utf8" => "UTF-8", |
||
253 | // |
||
254 | // Aliases for Shift_JIS", |
||
255 | // |
||
256 | "x-sjis" => "Shift_JIS", |
||
257 | "shift-jis" => "Shift_JIS", |
||
258 | "ms_kanji" => "Shift_JIS", |
||
259 | "csshiftjis" => "Shift_JIS", |
||
260 | "windows-31j" => "Shift_JIS", |
||
261 | "cp932" => "Shift_JIS", |
||
262 | "sjis" => "Shift_JIS", |
||
263 | // |
||
264 | // Aliases for EUC_JP", |
||
265 | // |
||
266 | "cseucpkdfmtjapanese" => "EUC-JP", |
||
267 | "x-euc-jp" => "EUC-JP", |
||
268 | // |
||
269 | // Aliases for ISO-2022-JP", |
||
270 | // |
||
271 | "csiso2022jp" => "ISO-2022-JP", |
||
272 | // The following are really not aliases ISO-2022-JP, but sharing the same decoder", |
||
273 | "iso-2022-jp-2" => "ISO-2022-JP", |
||
274 | "csiso2022jp2" => "ISO-2022-JP", |
||
275 | // |
||
276 | // Aliases for Big5", |
||
277 | // |
||
278 | "csbig5" => "Big5", |
||
279 | "cn-big5" => "Big5", |
||
280 | // x-x-big5 is not really a alias for Big5, add it only for MS FrontPage", |
||
281 | "x-x-big5" => "Big5", |
||
282 | // Sun Solaris", |
||
283 | "zh_tw-big5" => "Big5", |
||
284 | // |
||
285 | // Aliases for EUC-KR", |
||
286 | // |
||
287 | "cseuckr" => "EUC-KR", |
||
288 | "ks_c_5601-1987" => "EUC-KR", |
||
289 | "iso-ir-149" => "EUC-KR", |
||
290 | "ks_c_5601-1989" => "EUC-KR", |
||
291 | "ksc_5601" => "EUC-KR", |
||
292 | "ksc5601" => "EUC-KR", |
||
293 | "korean" => "EUC-KR", |
||
294 | "csksc56011987" => "EUC-KR", |
||
295 | "5601" => "EUC-KR", |
||
296 | "windows-949" => "EUC-KR", |
||
297 | // |
||
298 | // Aliases for GB2312", |
||
299 | // |
||
300 | // The following are really not aliases GB2312, add them only for MS FrontPage", |
||
301 | "gb_2312-80" => "GB2312", |
||
302 | "iso-ir-58" => "GB2312", |
||
303 | "chinese" => "GB2312", |
||
304 | "csiso58gb231280" => "GB2312", |
||
305 | "csgb2312" => "GB2312", |
||
306 | "zh_cn.euc" => "GB2312", |
||
307 | // Sun Solaris", |
||
308 | "gb_2312" => "GB2312", |
||
309 | // |
||
310 | // Aliases for windows-125x ", |
||
311 | // |
||
312 | "x-cp1250" => "windows-1250", |
||
313 | "x-cp1251" => "windows-1251", |
||
314 | "x-cp1252" => "windows-1252", |
||
315 | "x-cp1253" => "windows-1253", |
||
316 | "x-cp1254" => "windows-1254", |
||
317 | "x-cp1255" => "windows-1255", |
||
318 | "x-cp1256" => "windows-1256", |
||
319 | "x-cp1257" => "windows-1257", |
||
320 | "x-cp1258" => "windows-1258", |
||
321 | // |
||
322 | // Aliases for windows-874 ", |
||
323 | // |
||
324 | "windows-874" => "windows-874", |
||
325 | "ibm874" => "windows-874", |
||
326 | "dos-874" => "windows-874", |
||
327 | // |
||
328 | // Aliases for macintosh", |
||
329 | // |
||
330 | "macintosh" => "macintosh", |
||
331 | "x-mac-roman" => "macintosh", |
||
332 | "mac" => "macintosh", |
||
333 | "csmacintosh" => "macintosh", |
||
334 | // |
||
335 | // Aliases for IBM866", |
||
336 | // |
||
337 | "cp866" => "IBM866", |
||
338 | "cp-866" => "IBM866", |
||
339 | "866" => "IBM866", |
||
340 | "csibm866" => "IBM866", |
||
341 | // |
||
342 | // Aliases for IBM850", |
||
343 | // |
||
344 | "cp850" => "IBM850", |
||
345 | "850" => "IBM850", |
||
346 | "csibm850" => "IBM850", |
||
347 | // |
||
348 | // Aliases for IBM852", |
||
349 | // |
||
350 | "cp852" => "IBM852", |
||
351 | "852" => "IBM852", |
||
352 | "csibm852" => "IBM852", |
||
353 | // |
||
354 | // Aliases for IBM855", |
||
355 | // |
||
356 | "cp855" => "IBM855", |
||
357 | "855" => "IBM855", |
||
358 | "csibm855" => "IBM855", |
||
359 | // |
||
360 | // Aliases for IBM857", |
||
361 | // |
||
362 | "cp857" => "IBM857", |
||
363 | "857" => "IBM857", |
||
364 | "csibm857" => "IBM857", |
||
365 | // |
||
366 | // Aliases for IBM862", |
||
367 | // |
||
368 | "cp862" => "IBM862", |
||
369 | "862" => "IBM862", |
||
370 | "csibm862" => "IBM862", |
||
371 | // |
||
372 | // Aliases for IBM864", |
||
373 | // |
||
374 | "cp864" => "IBM864", |
||
375 | "864" => "IBM864", |
||
376 | "csibm864" => "IBM864", |
||
377 | "ibm-864" => "IBM864", |
||
378 | // |
||
379 | // Aliases for T.61-8bit", |
||
380 | // |
||
381 | "t.61" => "T.61-8bit", |
||
382 | "iso-ir-103" => "T.61-8bit", |
||
383 | "csiso103t618bit" => "T.61-8bit", |
||
384 | // |
||
385 | // Aliases for UTF-7", |
||
386 | // |
||
387 | "x-unicode-2-0-utf-7" => "UTF-7", |
||
388 | "unicode-2-0-utf-7" => "UTF-7", |
||
389 | "unicode-1-1-utf-7" => "UTF-7", |
||
390 | "csunicode11utf7" => "UTF-7", |
||
391 | // |
||
392 | // Aliases for ISO-10646-UCS-2", |
||
393 | // |
||
394 | "csunicode" => "UTF-16BE", |
||
395 | "csunicode11" => "UTF-16BE", |
||
396 | "iso-10646-ucs-basic" => "UTF-16BE", |
||
397 | "csunicodeascii" => "UTF-16BE", |
||
398 | "iso-10646-unicode-latin1" => "UTF-16BE", |
||
399 | "csunicodelatin1" => "UTF-16BE", |
||
400 | "iso-10646" => "UTF-16BE", |
||
401 | "iso-10646-j-1" => "UTF-16BE", |
||
402 | // |
||
403 | // Aliases for ISO-8859-10", |
||
404 | // |
||
405 | "latin6" => "ISO-8859-10", |
||
406 | "iso-ir-157" => "ISO-8859-10", |
||
407 | "l6" => "ISO-8859-10", |
||
408 | // Currently .properties cannot handle : in key", |
||
409 | //iso_8859-10:1992" => "ISO-8859-10", |
||
410 | "csisolatin6" => "ISO-8859-10", |
||
411 | // |
||
412 | // Aliases for ISO-8859-15", |
||
413 | // |
||
414 | "iso_8859-15" => "ISO-8859-15", |
||
415 | "csisolatin9" => "ISO-8859-15", |
||
416 | "l9" => "ISO-8859-15", |
||
417 | // |
||
418 | // Aliases for ISO-IR-111", |
||
419 | // |
||
420 | "ecma-cyrillic" => "ISO-IR-111", |
||
421 | "csiso111ecmacyrillic" => "ISO-IR-111", |
||
422 | // |
||
423 | // Aliases for ISO-2022-KR", |
||
424 | // |
||
425 | "csiso2022kr" => "ISO-2022-KR", |
||
426 | // |
||
427 | // Aliases for VISCII", |
||
428 | // |
||
429 | "csviscii" => "VISCII", |
||
430 | // |
||
431 | // Aliases for x-euc-tw", |
||
432 | // |
||
433 | "zh_tw-euc" => "x-euc-tw", |
||
434 | // |
||
435 | // Following names appears in unix nl_langinfo(CODESET)", |
||
436 | // They can be compiled as platform specific if necessary", |
||
437 | // DONT put things here if it does not look generic enough (like hp15CN)", |
||
438 | // |
||
439 | "iso88591" => "ISO-8859-1", |
||
440 | "iso88592" => "ISO-8859-2", |
||
441 | "iso88593" => "ISO-8859-3", |
||
442 | "iso88594" => "ISO-8859-4", |
||
443 | "iso88595" => "ISO-8859-5", |
||
444 | "iso88596" => "ISO-8859-6", |
||
445 | "iso88597" => "ISO-8859-7", |
||
446 | "iso88598" => "ISO-8859-8", |
||
447 | "iso88599" => "ISO-8859-9", |
||
448 | "iso885910" => "ISO-8859-10", |
||
449 | "iso885911" => "ISO-8859-11", |
||
450 | "iso885912" => "ISO-8859-12", |
||
451 | "iso885913" => "ISO-8859-13", |
||
452 | "iso885914" => "ISO-8859-14", |
||
453 | "iso885915" => "ISO-8859-15", |
||
454 | "cp1250" => "windows-1250", |
||
455 | "cp1251" => "windows-1251", |
||
456 | "cp1252" => "windows-1252", |
||
457 | "cp1253" => "windows-1253", |
||
458 | "cp1254" => "windows-1254", |
||
459 | "cp1255" => "windows-1255", |
||
460 | "cp1256" => "windows-1256", |
||
461 | "cp1257" => "windows-1257", |
||
462 | "cp1258" => "windows-1258", |
||
463 | "x-gbk" => "gbk", |
||
464 | "windows-936" => "gbk", |
||
465 | "ansi-1251" => "windows-1251", |
||
466 | ]; |
||
467 | |||
468 | /** |
||
469 | * Returns proper encoding mapping, if exsists. If it doesn't, return unchanged $encoding |
||
470 | * @param string|null $encoding |
||
471 | * @param string|null $fallback |
||
472 | * |
||
473 | * @return string |
||
474 | */ |
||
475 | public static function get($encoding, string $fallback = null): string { |
||
476 | if (isset(self::$aliases[strtolower($encoding ?? '')])) { |
||
477 | return self::$aliases[strtolower($encoding ?? '')]; |
||
478 | } |
||
479 | return $fallback !== null ? $fallback : $encoding; |
||
0 ignored issues
–
show
Bug
Best Practice
introduced
by
![]() |
|||
480 | } |
||
481 | |||
482 | } |
||
483 |