EncodingAliases   A
last analyzed

Complexity

Total Complexity 3

Size/Duplication

Total Lines 459
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 3
eloc 280
c 1
b 0
f 0
dl 0
loc 459
rs 10

1 Method

Rating   Name   Duplication   Size   Complexity  
A get() 0 5 3
1
<?php
2
/*
3
* File:     EncodingAliases.php
4
* Category: -
5
* Author:   S. Todorov (https://github.com/todorowww)
6
* Created:  23.04.18 14:16
7
* Updated:  -
8
*
9
* Description:
10
*  Contains email encoding aliases, thta can occur when fetching emails. These sometimes can break icvon()
11
*  This file attempts to correct this by using a list of aliases and their mappings to supported iconv() encodings
12
*/
13
14
namespace Webklex\PHPIMAP;
15
16
/**
17
 * Class EncodingAliases
18
 *
19
 * @package Webklex\PHPIMAP
20
 */
21
class EncodingAliases {
22
   
23
    /**
24
     * Contains email encoding mappings
25
     *
26
     * @var array
27
     */
28
    private static $aliases = [
29
        /*
30
        |--------------------------------------------------------------------------
31
        | Email encoding aliases
32
        |--------------------------------------------------------------------------
33
        |
34
        | Email encoding aliases used to convert to iconv supported charsets
35
        |
36
        |
37
        | This Source Code Form is subject to the terms of the Mozilla Public
38
        | License, v. 2.0. If a copy of the MPL was not distributed with this
39
        | file, You can obtain one at http://mozilla.org/MPL/2.0/.
40
        |
41
        | This Original Code has been modified by IBM Corporation.
42
        | Modifications made by IBM described herein are
43
        | Copyright (c) International Business Machines
44
        | Corporation, 1999
45
        |
46
        | Modifications to Mozilla code or documentation
47
        | identified per MPL Section 3.3
48
        |
49
        | Date         Modified by     Description of modification
50
        | 12/09/1999   IBM Corp.       Support for IBM codepages - 850,852,855,857,862,864
51
        |
52
        | Rule of this file:
53
        | 1. key should always be in lower case ascii so we can do case insensitive
54
        |    comparison in the code faster.
55
        | 2. value should be the one used in unicode converter
56
        |
57
        | 3. If the charset is not used for document charset, but font charset
58
        |    (e.g. XLFD charset- such as JIS x0201, JIS x0208), don't put here
59
        |
60
        */
61
        "ascii"                    => "us-ascii",
62
        "us-ascii"                 => "us-ascii",
63
        "ansi_x3.4-1968"           => "us-ascii",
64
        "646"                      => "us-ascii",
65
        "iso-8859-1"               => "ISO-8859-1",
66
        "iso-8859-2"               => "ISO-8859-2",
67
        "iso-8859-3"               => "ISO-8859-3",
68
        "iso-8859-4"               => "ISO-8859-4",
69
        "iso-8859-5"               => "ISO-8859-5",
70
        "iso-8859-6"               => "ISO-8859-6",
71
        "iso-8859-6-i"             => "ISO-8859-6-I",
72
        "iso-8859-6-e"             => "ISO-8859-6-E",
73
        "iso-8859-7"               => "ISO-8859-7",
74
        "iso-8859-8"               => "ISO-8859-8",
75
        "iso-8859-8-i"             => "ISO-8859-8-I",
76
        "iso-8859-8-e"             => "ISO-8859-8-E",
77
        "iso-8859-9"               => "ISO-8859-9",
78
        "iso-8859-10"              => "ISO-8859-10",
79
        "iso-8859-11"              => "ISO-8859-11",
80
        "iso-8859-13"              => "ISO-8859-13",
81
        "iso-8859-14"              => "ISO-8859-14",
82
        "iso-8859-15"              => "ISO-8859-15",
83
        "iso-8859-16"              => "ISO-8859-16",
84
        "iso-ir-111"               => "ISO-IR-111",
85
        "iso-2022-cn"              => "ISO-2022-CN",
86
        "iso-2022-cn-ext"          => "ISO-2022-CN",
87
        "iso-2022-kr"              => "ISO-2022-KR",
88
        "iso-2022-jp"              => "ISO-2022-JP",
89
        "utf-16be"                 => "UTF-16BE",
90
        "utf-16le"                 => "UTF-16LE",
91
        "utf-16"                   => "UTF-16",
92
        "windows-1250"             => "windows-1250",
93
        "windows-1251"             => "windows-1251",
94
        "windows-1252"             => "windows-1252",
95
        "windows-1253"             => "windows-1253",
96
        "windows-1254"             => "windows-1254",
97
        "windows-1255"             => "windows-1255",
98
        "windows-1256"             => "windows-1256",
99
        "windows-1257"             => "windows-1257",
100
        "windows-1258"             => "windows-1258",
101
        "ibm866"                   => "IBM866",
102
        "ibm850"                   => "IBM850",
103
        "ibm852"                   => "IBM852",
104
        "ibm855"                   => "IBM855",
105
        "ibm857"                   => "IBM857",
106
        "ibm862"                   => "IBM862",
107
        "ibm864"                   => "IBM864",
108
        "utf-8"                    => "UTF-8",
109
        "utf-7"                    => "UTF-7",
110
        "shift_jis"                => "Shift_JIS",
111
        "big5"                     => "Big5",
112
        "euc-jp"                   => "EUC-JP",
113
        "euc-kr"                   => "EUC-KR",
114
        "gb2312"                   => "GB2312",
115
        "gb18030"                  => "gb18030",
116
        "viscii"                   => "VISCII",
117
        "koi8-r"                   => "KOI8-R",
118
        "koi8_r"                   => "KOI8-R",
119
        "cskoi8r"                  => "KOI8-R",
120
        "koi"                      => "KOI8-R",
121
        "koi8"                     => "KOI8-R",
122
        "koi8-u"                   => "KOI8-U",
123
        "tis-620"                  => "TIS-620",
124
        "t.61-8bit"                => "T.61-8bit",
125
        "hz-gb-2312"               => "HZ-GB-2312",
126
        "big5-hkscs"               => "Big5-HKSCS",
127
        "gbk"                      => "gbk",
128
        "cns11643"                 => "x-euc-tw",
129
        //
130
        // Aliases for ISO-8859-1
131
        //
132
        "latin1"                   => "ISO-8859-1",
133
        "iso_8859-1"               => "ISO-8859-1",
134
        "iso8859-1"                => "ISO-8859-1",
135
        "iso8859-2"                => "ISO-8859-2",
136
        "iso8859-3"                => "ISO-8859-3",
137
        "iso8859-4"                => "ISO-8859-4",
138
        "iso8859-5"                => "ISO-8859-5",
139
        "iso8859-6"                => "ISO-8859-6",
140
        "iso8859-7"                => "ISO-8859-7",
141
        "iso8859-8"                => "ISO-8859-8",
142
        "iso8859-9"                => "ISO-8859-9",
143
        "iso8859-10"               => "ISO-8859-10",
144
        "iso8859-11"               => "ISO-8859-11",
145
        "iso8859-13"               => "ISO-8859-13",
146
        "iso8859-14"               => "ISO-8859-14",
147
        "iso8859-15"               => "ISO-8859-15",
148
        "iso_8859-1:1987"          => "ISO-8859-1",
149
        "iso-ir-100"               => "ISO-8859-1",
150
        "l1"                       => "ISO-8859-1",
151
        "ibm819"                   => "ISO-8859-1",
152
        "cp819"                    => "ISO-8859-1",
153
        "csisolatin1"              => "ISO-8859-1",
154
        //
155
        // Aliases for ISO-8859-2
156
        //
157
        "latin2"                   => "ISO-8859-2",
158
        "iso_8859-2"               => "ISO-8859-2",
159
        "iso_8859-2:1987"          => "ISO-8859-2",
160
        "iso-ir-101"               => "ISO-8859-2",
161
        "l2"                       => "ISO-8859-2",
162
        "csisolatin2"              => "ISO-8859-2",
163
        //
164
        // Aliases for ISO-8859-3
165
        //
166
        "latin3"                   => "ISO-8859-3",
167
        "iso_8859-3"               => "ISO-8859-3",
168
        "iso_8859-3:1988"          => "ISO-8859-3",
169
        "iso-ir-109"               => "ISO-8859-3",
170
        "l3"                       => "ISO-8859-3",
171
        "csisolatin3"              => "ISO-8859-3",
172
        //
173
        // Aliases for ISO-8859-4
174
        //
175
        "latin4"                   => "ISO-8859-4",
176
        "iso_8859-4"               => "ISO-8859-4",
177
        "iso_8859-4:1988"          => "ISO-8859-4",
178
        "iso-ir-110"               => "ISO-8859-4",
179
        "l4"                       => "ISO-8859-4",
180
        "csisolatin4"              => "ISO-8859-4",
181
        //
182
        // Aliases for ISO-8859-5
183
        //
184
        "cyrillic"                 => "ISO-8859-5",
185
        "iso_8859-5"               => "ISO-8859-5",
186
        "iso_8859-5:1988"          => "ISO-8859-5",
187
        "iso-ir-144"               => "ISO-8859-5",
188
        "csisolatincyrillic"       => "ISO-8859-5",
189
        //
190
        // Aliases for ISO-8859-6
191
        //
192
        "arabic"                   => "ISO-8859-6",
193
        "iso_8859-6"               => "ISO-8859-6",
194
        "iso_8859-6:1987"          => "ISO-8859-6",
195
        "iso-ir-127"               => "ISO-8859-6",
196
        "ecma-114"                 => "ISO-8859-6",
197
        "asmo-708"                 => "ISO-8859-6",
198
        "csisolatinarabic"         => "ISO-8859-6",
199
        //
200
        // Aliases for ISO-8859-6-I
201
        //
202
        "csiso88596i"              => "ISO-8859-6-I",
203
        //
204
        // Aliases for ISO-8859-6-E",
205
        //
206
        "csiso88596e"              => "ISO-8859-6-E",
207
        //
208
        // Aliases for ISO-8859-7",
209
        //
210
        "greek"                    => "ISO-8859-7",
211
        "greek8"                   => "ISO-8859-7",
212
        "sun_eu_greek"             => "ISO-8859-7",
213
        "iso_8859-7"               => "ISO-8859-7",
214
        "iso_8859-7:1987"          => "ISO-8859-7",
215
        "iso-ir-126"               => "ISO-8859-7",
216
        "elot_928"                 => "ISO-8859-7",
217
        "ecma-118"                 => "ISO-8859-7",
218
        "csisolatingreek"          => "ISO-8859-7",
219
        //
220
        // Aliases for ISO-8859-8",
221
        //
222
        "hebrew"                   => "ISO-8859-8",
223
        "iso_8859-8"               => "ISO-8859-8",
224
        "visual"                   => "ISO-8859-8",
225
        "iso_8859-8:1988"          => "ISO-8859-8",
226
        "iso-ir-138"               => "ISO-8859-8",
227
        "csisolatinhebrew"         => "ISO-8859-8",
228
        //
229
        // Aliases for ISO-8859-8-I",
230
        //
231
        "csiso88598i"              => "ISO-8859-8-I",
232
        "iso-8859-8i"              => "ISO-8859-8-I",
233
        "logical"                  => "ISO-8859-8-I",
234
        //
235
        // Aliases for ISO-8859-8-E",
236
        //
237
        "csiso88598e"              => "ISO-8859-8-E",
238
        //
239
        // Aliases for ISO-8859-9",
240
        //
241
        "latin5"                   => "ISO-8859-9",
242
        "iso_8859-9"               => "ISO-8859-9",
243
        "iso_8859-9:1989"          => "ISO-8859-9",
244
        "iso-ir-148"               => "ISO-8859-9",
245
        "l5"                       => "ISO-8859-9",
246
        "csisolatin5"              => "ISO-8859-9",
247
        //
248
        // Aliases for UTF-8",
249
        //
250
        "unicode-1-1-utf-8"        => "UTF-8",
251
        // nl_langinfo(CODESET) in HP/UX returns 'utf8' under UTF-8 locales",
252
        "utf8"                     => "UTF-8",
253
        //
254
        // Aliases for Shift_JIS",
255
        //
256
        "x-sjis"                   => "Shift_JIS",
257
        "shift-jis"                => "Shift_JIS",
258
        "ms_kanji"                 => "Shift_JIS",
259
        "csshiftjis"               => "Shift_JIS",
260
        "windows-31j"              => "Shift_JIS",
261
        "cp932"                    => "Shift_JIS",
262
        "sjis"                     => "Shift_JIS",
263
        //
264
        // Aliases for EUC_JP",
265
        //
266
        "cseucpkdfmtjapanese"      => "EUC-JP",
267
        "x-euc-jp"                 => "EUC-JP",
268
        //
269
        // Aliases for ISO-2022-JP",
270
        //
271
        "csiso2022jp"              => "ISO-2022-JP",
272
        // The following are really not aliases ISO-2022-JP, but sharing the same decoder",
273
        "iso-2022-jp-2"            => "ISO-2022-JP",
274
        "csiso2022jp2"             => "ISO-2022-JP",
275
        //
276
        // Aliases for Big5",
277
        //
278
        "csbig5"                   => "Big5",
279
        "cn-big5"                  => "Big5",
280
        // x-x-big5 is not really a alias for Big5, add it only for MS FrontPage",
281
        "x-x-big5"                 => "Big5",
282
        // Sun Solaris",
283
        "zh_tw-big5"               => "Big5",
284
        //
285
        // Aliases for EUC-KR",
286
        //
287
        "cseuckr"                  => "EUC-KR",
288
        "ks_c_5601-1987"           => "EUC-KR",
289
        "iso-ir-149"               => "EUC-KR",
290
        "ks_c_5601-1989"           => "EUC-KR",
291
        "ksc_5601"                 => "EUC-KR",
292
        "ksc5601"                  => "EUC-KR",
293
        "korean"                   => "EUC-KR",
294
        "csksc56011987"            => "EUC-KR",
295
        "5601"                     => "EUC-KR",
296
        "windows-949"              => "EUC-KR",
297
        //
298
        // Aliases for GB2312",
299
        //
300
        // The following are really not aliases GB2312, add them only for MS FrontPage",
301
        "gb_2312-80"               => "GB2312",
302
        "iso-ir-58"                => "GB2312",
303
        "chinese"                  => "GB2312",
304
        "csiso58gb231280"          => "GB2312",
305
        "csgb2312"                 => "GB2312",
306
        "zh_cn.euc"                => "GB2312",
307
        // Sun Solaris",
308
        "gb_2312"                  => "GB2312",
309
        //
310
        // Aliases for windows-125x ",
311
        //
312
        "x-cp1250"                 => "windows-1250",
313
        "x-cp1251"                 => "windows-1251",
314
        "x-cp1252"                 => "windows-1252",
315
        "x-cp1253"                 => "windows-1253",
316
        "x-cp1254"                 => "windows-1254",
317
        "x-cp1255"                 => "windows-1255",
318
        "x-cp1256"                 => "windows-1256",
319
        "x-cp1257"                 => "windows-1257",
320
        "x-cp1258"                 => "windows-1258",
321
        //
322
        // Aliases for windows-874 ",
323
        //
324
        "windows-874"              => "windows-874",
325
        "ibm874"                   => "windows-874",
326
        "dos-874"                  => "windows-874",
327
        //
328
        // Aliases for macintosh",
329
        //
330
        "macintosh"                => "macintosh",
331
        "x-mac-roman"              => "macintosh",
332
        "mac"                      => "macintosh",
333
        "csmacintosh"              => "macintosh",
334
        //
335
        // Aliases for IBM866",
336
        //
337
        "cp866"                    => "IBM866",
338
        "cp-866"                   => "IBM866",
339
        "866"                      => "IBM866",
340
        "csibm866"                 => "IBM866",
341
        //
342
        // Aliases for IBM850",
343
        //
344
        "cp850"                    => "IBM850",
345
        "850"                      => "IBM850",
346
        "csibm850"                 => "IBM850",
347
        //
348
        // Aliases for IBM852",
349
        //
350
        "cp852"                    => "IBM852",
351
        "852"                      => "IBM852",
352
        "csibm852"                 => "IBM852",
353
        //
354
        // Aliases for IBM855",
355
        //
356
        "cp855"                    => "IBM855",
357
        "855"                      => "IBM855",
358
        "csibm855"                 => "IBM855",
359
        //
360
        // Aliases for IBM857",
361
        //
362
        "cp857"                    => "IBM857",
363
        "857"                      => "IBM857",
364
        "csibm857"                 => "IBM857",
365
        //
366
        // Aliases for IBM862",
367
        //
368
        "cp862"                    => "IBM862",
369
        "862"                      => "IBM862",
370
        "csibm862"                 => "IBM862",
371
        //
372
        // Aliases for IBM864",
373
        //
374
        "cp864"                    => "IBM864",
375
        "864"                      => "IBM864",
376
        "csibm864"                 => "IBM864",
377
        "ibm-864"                  => "IBM864",
378
        //
379
        // Aliases for T.61-8bit",
380
        //
381
        "t.61"                     => "T.61-8bit",
382
        "iso-ir-103"               => "T.61-8bit",
383
        "csiso103t618bit"          => "T.61-8bit",
384
        //
385
        // Aliases for UTF-7",
386
        //
387
        "x-unicode-2-0-utf-7"      => "UTF-7",
388
        "unicode-2-0-utf-7"        => "UTF-7",
389
        "unicode-1-1-utf-7"        => "UTF-7",
390
        "csunicode11utf7"          => "UTF-7",
391
        //
392
        // Aliases for ISO-10646-UCS-2",
393
        //
394
        "csunicode"                => "UTF-16BE",
395
        "csunicode11"              => "UTF-16BE",
396
        "iso-10646-ucs-basic"      => "UTF-16BE",
397
        "csunicodeascii"           => "UTF-16BE",
398
        "iso-10646-unicode-latin1" => "UTF-16BE",
399
        "csunicodelatin1"          => "UTF-16BE",
400
        "iso-10646"                => "UTF-16BE",
401
        "iso-10646-j-1"            => "UTF-16BE",
402
        //
403
        // Aliases for ISO-8859-10",
404
        //
405
        "latin6"                   => "ISO-8859-10",
406
        "iso-ir-157"               => "ISO-8859-10",
407
        "l6"                       => "ISO-8859-10",
408
        // Currently .properties cannot handle : in key",
409
        //iso_8859-10:1992" => "ISO-8859-10",
410
        "csisolatin6"              => "ISO-8859-10",
411
        //
412
        // Aliases for ISO-8859-15",
413
        //
414
        "iso_8859-15"              => "ISO-8859-15",
415
        "csisolatin9"              => "ISO-8859-15",
416
        "l9"                       => "ISO-8859-15",
417
        //
418
        // Aliases for ISO-IR-111",
419
        //
420
        "ecma-cyrillic"            => "ISO-IR-111",
421
        "csiso111ecmacyrillic"     => "ISO-IR-111",
422
        //
423
        // Aliases for ISO-2022-KR",
424
        //
425
        "csiso2022kr"              => "ISO-2022-KR",
426
        //
427
        // Aliases for VISCII",
428
        //
429
        "csviscii"                 => "VISCII",
430
        //
431
        // Aliases for x-euc-tw",
432
        //
433
        "zh_tw-euc"                => "x-euc-tw",
434
        //
435
        // Following names appears in unix nl_langinfo(CODESET)",
436
        // They can be compiled as platform specific if necessary",
437
        // DONT put things here if it does not look generic enough (like hp15CN)",
438
        //
439
        "iso88591"                 => "ISO-8859-1",
440
        "iso88592"                 => "ISO-8859-2",
441
        "iso88593"                 => "ISO-8859-3",
442
        "iso88594"                 => "ISO-8859-4",
443
        "iso88595"                 => "ISO-8859-5",
444
        "iso88596"                 => "ISO-8859-6",
445
        "iso88597"                 => "ISO-8859-7",
446
        "iso88598"                 => "ISO-8859-8",
447
        "iso88599"                 => "ISO-8859-9",
448
        "iso885910"                => "ISO-8859-10",
449
        "iso885911"                => "ISO-8859-11",
450
        "iso885912"                => "ISO-8859-12",
451
        "iso885913"                => "ISO-8859-13",
452
        "iso885914"                => "ISO-8859-14",
453
        "iso885915"                => "ISO-8859-15",
454
        "cp1250"                   => "windows-1250",
455
        "cp1251"                   => "windows-1251",
456
        "cp1252"                   => "windows-1252",
457
        "cp1253"                   => "windows-1253",
458
        "cp1254"                   => "windows-1254",
459
        "cp1255"                   => "windows-1255",
460
        "cp1256"                   => "windows-1256",
461
        "cp1257"                   => "windows-1257",
462
        "cp1258"                   => "windows-1258",
463
        "x-gbk"                    => "gbk",
464
        "windows-936"              => "gbk",
465
        "ansi-1251"                => "windows-1251",
466
    ];        
467
    
468
    /**
469
     * Returns proper encoding mapping, if exsists. If it doesn't, return unchanged $encoding
470
     * @param string|null $encoding
471
     * @param string|null $fallback
472
     *
473
     * @return string
474
     */
475
    public static function get($encoding, string $fallback = null): string {
476
        if (isset(self::$aliases[strtolower($encoding ?? '')])) {
477
            return self::$aliases[strtolower($encoding ?? '')];
478
        }
479
        return $fallback !== null ? $fallback : $encoding;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $fallback !== null ? $fallback : $encoding could return the type null which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
480
    }
481
    
482
}
483