Completed
Push — master ( 74fe4d...510163 )
by Malte
04:56 queued 01:44
created

EncodingAliases   A

Complexity

Total Complexity 2

Size/Duplication

Total Lines 457
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
dl 0
loc 457
rs 10
c 0
b 0
f 0
wmc 2

1 Method

Rating   Name   Duplication   Size   Complexity  
A get() 0 5 2
1
<?php
2
/*
3
* File:     EncodingAliases.php
4
* Category: -
5
* Author:   S. Todorov (https://github.com/todorowww)
6
* Created:  23.04.18 14:16
7
* Updated:  -
8
*
9
* Description:
10
*  Contains email encoding aliases, thta can occur when fetching emails. These sometimes can break icvon()
11
*  This file attempts to correct this by using a list of aliases and their mappings to supported iconv() encodings
12
*/
13
14
namespace Webklex\IMAP;
15
16
/**
17
 * Class EncodingAliases
18
 *
19
 * @package Webklex\IMAP
20
 */
21
class EncodingAliases {
22
   
23
    /**
24
     * Contains email encoding mappings
25
     * @var array
26
     */
27
    private static $aliases = [
28
        /*
29
        |--------------------------------------------------------------------------
30
        | Email encoding aliases
31
        |--------------------------------------------------------------------------
32
        |
33
        | Email encoding aliases used to convert to iconv supported charsets
34
        |
35
        |
36
        | This Source Code Form is subject to the terms of the Mozilla Public
37
        | License, v. 2.0. If a copy of the MPL was not distributed with this
38
        | file, You can obtain one at http://mozilla.org/MPL/2.0/.
39
        |
40
        | This Original Code has been modified by IBM Corporation.
41
        | Modifications made by IBM described herein are
42
        | Copyright (c) International Business Machines
43
        | Corporation, 1999
44
        |
45
        | Modifications to Mozilla code or documentation
46
        | identified per MPL Section 3.3
47
        |
48
        | Date         Modified by     Description of modification
49
        | 12/09/1999   IBM Corp.       Support for IBM codepages - 850,852,855,857,862,864
50
        |
51
        | Rule of this file:
52
        | 1. key should always be in lower case ascii so we can do case insensitive
53
        |    comparison in the code faster.
54
        | 2. value should be the one used in unicode converter
55
        |
56
        | 3. If the charset is not used for document charset, but font charset
57
        |    (e.g. XLFD charset- such as JIS x0201, JIS x0208), don't put here
58
        |
59
        */
60
        "ascii"                    => "us-ascii",
61
        "us-ascii"                 => "us-ascii",
62
        "ansi_x3.4-1968"           => "us-ascii",
63
        "646"                      => "us-ascii",
64
        "iso-8859-1"               => "ISO-8859-1",
65
        "iso-8859-2"               => "ISO-8859-2",
66
        "iso-8859-3"               => "ISO-8859-3",
67
        "iso-8859-4"               => "ISO-8859-4",
68
        "iso-8859-5"               => "ISO-8859-5",
69
        "iso-8859-6"               => "ISO-8859-6",
70
        "iso-8859-6-i"             => "ISO-8859-6-I",
71
        "iso-8859-6-e"             => "ISO-8859-6-E",
72
        "iso-8859-7"               => "ISO-8859-7",
73
        "iso-8859-8"               => "ISO-8859-8",
74
        "iso-8859-8-i"             => "ISO-8859-8-I",
75
        "iso-8859-8-e"             => "ISO-8859-8-E",
76
        "iso-8859-9"               => "ISO-8859-9",
77
        "iso-8859-10"              => "ISO-8859-10",
78
        "iso-8859-11"              => "ISO-8859-11",
79
        "iso-8859-13"              => "ISO-8859-13",
80
        "iso-8859-14"              => "ISO-8859-14",
81
        "iso-8859-15"              => "ISO-8859-15",
82
        "iso-8859-16"              => "ISO-8859-16",
83
        "iso-ir-111"               => "ISO-IR-111",
84
        "iso-2022-cn"              => "ISO-2022-CN",
85
        "iso-2022-cn-ext"          => "ISO-2022-CN",
86
        "iso-2022-kr"              => "ISO-2022-KR",
87
        "iso-2022-jp"              => "ISO-2022-JP",
88
        "utf-16be"                 => "UTF-16BE",
89
        "utf-16le"                 => "UTF-16LE",
90
        "utf-16"                   => "UTF-16",
91
        "windows-1250"             => "windows-1250",
92
        "windows-1251"             => "windows-1251",
93
        "windows-1252"             => "windows-1252",
94
        "windows-1253"             => "windows-1253",
95
        "windows-1254"             => "windows-1254",
96
        "windows-1255"             => "windows-1255",
97
        "windows-1256"             => "windows-1256",
98
        "windows-1257"             => "windows-1257",
99
        "windows-1258"             => "windows-1258",
100
        "ibm866"                   => "IBM866",
101
        "ibm850"                   => "IBM850",
102
        "ibm852"                   => "IBM852",
103
        "ibm855"                   => "IBM855",
104
        "ibm857"                   => "IBM857",
105
        "ibm862"                   => "IBM862",
106
        "ibm864"                   => "IBM864",
107
        "utf-8"                    => "UTF-8",
108
        "utf-7"                    => "UTF-7",
109
        "shift_jis"                => "Shift_JIS",
110
        "big5"                     => "Big5",
111
        "euc-jp"                   => "EUC-JP",
112
        "euc-kr"                   => "EUC-KR",
113
        "gb2312"                   => "GB2312",
114
        "gb18030"                  => "gb18030",
115
        "viscii"                   => "VISCII",
116
        "koi8-r"                   => "KOI8-R",
117
        "koi8_r"                   => "KOI8-R",
118
        "cskoi8r"                  => "KOI8-R",
119
        "koi"                      => "KOI8-R",
120
        "koi8"                     => "KOI8-R",
121
        "koi8-u"                   => "KOI8-U",
122
        "tis-620"                  => "TIS-620",
123
        "t.61-8bit"                => "T.61-8bit",
124
        "hz-gb-2312"               => "HZ-GB-2312",
125
        "big5-hkscs"               => "Big5-HKSCS",
126
        "gbk"                      => "gbk",
127
        "cns11643"                 => "x-euc-tw",
128
        //
129
        // Aliases for ISO-8859-1
130
        //
131
        "latin1"                   => "ISO-8859-1",
132
        "iso_8859-1"               => "ISO-8859-1",
133
        "iso8859-1"                => "ISO-8859-1",
134
        "iso8859-2"                => "ISO-8859-2",
135
        "iso8859-3"                => "ISO-8859-3",
136
        "iso8859-4"                => "ISO-8859-4",
137
        "iso8859-5"                => "ISO-8859-5",
138
        "iso8859-6"                => "ISO-8859-6",
139
        "iso8859-7"                => "ISO-8859-7",
140
        "iso8859-8"                => "ISO-8859-8",
141
        "iso8859-9"                => "ISO-8859-9",
142
        "iso8859-10"               => "ISO-8859-10",
143
        "iso8859-11"               => "ISO-8859-11",
144
        "iso8859-13"               => "ISO-8859-13",
145
        "iso8859-14"               => "ISO-8859-14",
146
        "iso8859-15"               => "ISO-8859-15",
147
        "iso_8859-1:1987"          => "ISO-8859-1",
148
        "iso-ir-100"               => "ISO-8859-1",
149
        "l1"                       => "ISO-8859-1",
150
        "ibm819"                   => "ISO-8859-1",
151
        "cp819"                    => "ISO-8859-1",
152
        "csisolatin1"              => "ISO-8859-1",
153
        //
154
        // Aliases for ISO-8859-2
155
        //
156
        "latin2"                   => "ISO-8859-2",
157
        "iso_8859-2"               => "ISO-8859-2",
158
        "iso_8859-2:1987"          => "ISO-8859-2",
159
        "iso-ir-101"               => "ISO-8859-2",
160
        "l2"                       => "ISO-8859-2",
161
        "csisolatin2"              => "ISO-8859-2",
162
        //
163
        // Aliases for ISO-8859-3
164
        //
165
        "latin3"                   => "ISO-8859-3",
166
        "iso_8859-3"               => "ISO-8859-3",
167
        "iso_8859-3:1988"          => "ISO-8859-3",
168
        "iso-ir-109"               => "ISO-8859-3",
169
        "l3"                       => "ISO-8859-3",
170
        "csisolatin3"              => "ISO-8859-3",
171
        //
172
        // Aliases for ISO-8859-4
173
        //
174
        "latin4"                   => "ISO-8859-4",
175
        "iso_8859-4"               => "ISO-8859-4",
176
        "iso_8859-4:1988"          => "ISO-8859-4",
177
        "iso-ir-110"               => "ISO-8859-4",
178
        "l4"                       => "ISO-8859-4",
179
        "csisolatin4"              => "ISO-8859-4",
180
        //
181
        // Aliases for ISO-8859-5
182
        //
183
        "cyrillic"                 => "ISO-8859-5",
184
        "iso_8859-5"               => "ISO-8859-5",
185
        "iso_8859-5:1988"          => "ISO-8859-5",
186
        "iso-ir-144"               => "ISO-8859-5",
187
        "csisolatincyrillic"       => "ISO-8859-5",
188
        //
189
        // Aliases for ISO-8859-6
190
        //
191
        "arabic"                   => "ISO-8859-6",
192
        "iso_8859-6"               => "ISO-8859-6",
193
        "iso_8859-6:1987"          => "ISO-8859-6",
194
        "iso-ir-127"               => "ISO-8859-6",
195
        "ecma-114"                 => "ISO-8859-6",
196
        "asmo-708"                 => "ISO-8859-6",
197
        "csisolatinarabic"         => "ISO-8859-6",
198
        //
199
        // Aliases for ISO-8859-6-I
200
        //
201
        "csiso88596i"              => "ISO-8859-6-I",
202
        //
203
        // Aliases for ISO-8859-6-E",
204
        //
205
        "csiso88596e"              => "ISO-8859-6-E",
206
        //
207
        // Aliases for ISO-8859-7",
208
        //
209
        "greek"                    => "ISO-8859-7",
210
        "greek8"                   => "ISO-8859-7",
211
        "sun_eu_greek"             => "ISO-8859-7",
212
        "iso_8859-7"               => "ISO-8859-7",
213
        "iso_8859-7:1987"          => "ISO-8859-7",
214
        "iso-ir-126"               => "ISO-8859-7",
215
        "elot_928"                 => "ISO-8859-7",
216
        "ecma-118"                 => "ISO-8859-7",
217
        "csisolatingreek"          => "ISO-8859-7",
218
        //
219
        // Aliases for ISO-8859-8",
220
        //
221
        "hebrew"                   => "ISO-8859-8",
222
        "iso_8859-8"               => "ISO-8859-8",
223
        "visual"                   => "ISO-8859-8",
224
        "iso_8859-8:1988"          => "ISO-8859-8",
225
        "iso-ir-138"               => "ISO-8859-8",
226
        "csisolatinhebrew"         => "ISO-8859-8",
227
        //
228
        // Aliases for ISO-8859-8-I",
229
        //
230
        "csiso88598i"              => "ISO-8859-8-I",
231
        "iso-8859-8i"              => "ISO-8859-8-I",
232
        "logical"                  => "ISO-8859-8-I",
233
        //
234
        // Aliases for ISO-8859-8-E",
235
        //
236
        "csiso88598e"              => "ISO-8859-8-E",
237
        //
238
        // Aliases for ISO-8859-9",
239
        //
240
        "latin5"                   => "ISO-8859-9",
241
        "iso_8859-9"               => "ISO-8859-9",
242
        "iso_8859-9:1989"          => "ISO-8859-9",
243
        "iso-ir-148"               => "ISO-8859-9",
244
        "l5"                       => "ISO-8859-9",
245
        "csisolatin5"              => "ISO-8859-9",
246
        //
247
        // Aliases for UTF-8",
248
        //
249
        "unicode-1-1-utf-8"        => "UTF-8",
250
        // nl_langinfo(CODESET) in HP/UX returns 'utf8' under UTF-8 locales",
251
        "utf8"                     => "UTF-8",
252
        //
253
        // Aliases for Shift_JIS",
254
        //
255
        "x-sjis"                   => "Shift_JIS",
256
        "shift-jis"                => "Shift_JIS",
257
        "ms_kanji"                 => "Shift_JIS",
258
        "csshiftjis"               => "Shift_JIS",
259
        "windows-31j"              => "Shift_JIS",
260
        "cp932"                    => "Shift_JIS",
261
        "sjis"                     => "Shift_JIS",
262
        //
263
        // Aliases for EUC_JP",
264
        //
265
        "cseucpkdfmtjapanese"      => "EUC-JP",
266
        "x-euc-jp"                 => "EUC-JP",
267
        //
268
        // Aliases for ISO-2022-JP",
269
        //
270
        "csiso2022jp"              => "ISO-2022-JP",
271
        // The following are really not aliases ISO-2022-JP, but sharing the same decoder",
272
        "iso-2022-jp-2"            => "ISO-2022-JP",
273
        "csiso2022jp2"             => "ISO-2022-JP",
274
        //
275
        // Aliases for Big5",
276
        //
277
        "csbig5"                   => "Big5",
278
        "cn-big5"                  => "Big5",
279
        // x-x-big5 is not really a alias for Big5, add it only for MS FrontPage",
280
        "x-x-big5"                 => "Big5",
281
        // Sun Solaris",
282
        "zh_tw-big5"               => "Big5",
283
        //
284
        // Aliases for EUC-KR",
285
        //
286
        "cseuckr"                  => "EUC-KR",
287
        "ks_c_5601-1987"           => "EUC-KR",
288
        "iso-ir-149"               => "EUC-KR",
289
        "ks_c_5601-1989"           => "EUC-KR",
290
        "ksc_5601"                 => "EUC-KR",
291
        "ksc5601"                  => "EUC-KR",
292
        "korean"                   => "EUC-KR",
293
        "csksc56011987"            => "EUC-KR",
294
        "5601"                     => "EUC-KR",
295
        "windows-949"              => "EUC-KR",
296
        //
297
        // Aliases for GB2312",
298
        //
299
        // The following are really not aliases GB2312, add them only for MS FrontPage",
300
        "gb_2312-80"               => "GB2312",
301
        "iso-ir-58"                => "GB2312",
302
        "chinese"                  => "GB2312",
303
        "csiso58gb231280"          => "GB2312",
304
        "csgb2312"                 => "GB2312",
305
        "zh_cn.euc"                => "GB2312",
306
        // Sun Solaris",
307
        "gb_2312"                  => "GB2312",
308
        //
309
        // Aliases for windows-125x ",
310
        //
311
        "x-cp1250"                 => "windows-1250",
312
        "x-cp1251"                 => "windows-1251",
313
        "x-cp1252"                 => "windows-1252",
314
        "x-cp1253"                 => "windows-1253",
315
        "x-cp1254"                 => "windows-1254",
316
        "x-cp1255"                 => "windows-1255",
317
        "x-cp1256"                 => "windows-1256",
318
        "x-cp1257"                 => "windows-1257",
319
        "x-cp1258"                 => "windows-1258",
320
        //
321
        // Aliases for windows-874 ",
322
        //
323
        "windows-874"              => "windows-874",
324
        "ibm874"                   => "windows-874",
325
        "dos-874"                  => "windows-874",
326
        //
327
        // Aliases for macintosh",
328
        //
329
        "macintosh"                => "macintosh",
330
        "x-mac-roman"              => "macintosh",
331
        "mac"                      => "macintosh",
332
        "csmacintosh"              => "macintosh",
333
        //
334
        // Aliases for IBM866",
335
        //
336
        "cp866"                    => "IBM866",
337
        "cp-866"                   => "IBM866",
338
        "866"                      => "IBM866",
339
        "csibm866"                 => "IBM866",
340
        //
341
        // Aliases for IBM850",
342
        //
343
        "cp850"                    => "IBM850",
344
        "850"                      => "IBM850",
345
        "csibm850"                 => "IBM850",
346
        //
347
        // Aliases for IBM852",
348
        //
349
        "cp852"                    => "IBM852",
350
        "852"                      => "IBM852",
351
        "csibm852"                 => "IBM852",
352
        //
353
        // Aliases for IBM855",
354
        //
355
        "cp855"                    => "IBM855",
356
        "855"                      => "IBM855",
357
        "csibm855"                 => "IBM855",
358
        //
359
        // Aliases for IBM857",
360
        //
361
        "cp857"                    => "IBM857",
362
        "857"                      => "IBM857",
363
        "csibm857"                 => "IBM857",
364
        //
365
        // Aliases for IBM862",
366
        //
367
        "cp862"                    => "IBM862",
368
        "862"                      => "IBM862",
369
        "csibm862"                 => "IBM862",
370
        //
371
        // Aliases for IBM864",
372
        //
373
        "cp864"                    => "IBM864",
374
        "864"                      => "IBM864",
375
        "csibm864"                 => "IBM864",
376
        "ibm-864"                  => "IBM864",
377
        //
378
        // Aliases for T.61-8bit",
379
        //
380
        "t.61"                     => "T.61-8bit",
381
        "iso-ir-103"               => "T.61-8bit",
382
        "csiso103t618bit"          => "T.61-8bit",
383
        //
384
        // Aliases for UTF-7",
385
        //
386
        "x-unicode-2-0-utf-7"      => "UTF-7",
387
        "unicode-2-0-utf-7"        => "UTF-7",
388
        "unicode-1-1-utf-7"        => "UTF-7",
389
        "csunicode11utf7"          => "UTF-7",
390
        //
391
        // Aliases for ISO-10646-UCS-2",
392
        //
393
        "csunicode"                => "UTF-16BE",
394
        "csunicode11"              => "UTF-16BE",
395
        "iso-10646-ucs-basic"      => "UTF-16BE",
396
        "csunicodeascii"           => "UTF-16BE",
397
        "iso-10646-unicode-latin1" => "UTF-16BE",
398
        "csunicodelatin1"          => "UTF-16BE",
399
        "iso-10646"                => "UTF-16BE",
400
        "iso-10646-j-1"            => "UTF-16BE",
401
        //
402
        // Aliases for ISO-8859-10",
403
        //
404
        "latin6"                   => "ISO-8859-10",
405
        "iso-ir-157"               => "ISO-8859-10",
406
        "l6"                       => "ISO-8859-10",
407
        // Currently .properties cannot handle : in key",
408
        //iso_8859-10:1992" => "ISO-8859-10",
409
        "csisolatin6"              => "ISO-8859-10",
410
        //
411
        // Aliases for ISO-8859-15",
412
        //
413
        "iso_8859-15"              => "ISO-8859-15",
414
        "csisolatin9"              => "ISO-8859-15",
415
        "l9"                       => "ISO-8859-15",
416
        //
417
        // Aliases for ISO-IR-111",
418
        //
419
        "ecma-cyrillic"            => "ISO-IR-111",
420
        "csiso111ecmacyrillic"     => "ISO-IR-111",
421
        //
422
        // Aliases for ISO-2022-KR",
423
        //
424
        "csiso2022kr"              => "ISO-2022-KR",
425
        //
426
        // Aliases for VISCII",
427
        //
428
        "csviscii"                 => "VISCII",
429
        //
430
        // Aliases for x-euc-tw",
431
        //
432
        "zh_tw-euc"                => "x-euc-tw",
433
        //
434
        // Following names appears in unix nl_langinfo(CODESET)",
435
        // They can be compiled as platform specific if necessary",
436
        // DONT put things here if it does not look generic enough (like hp15CN)",
437
        //
438
        "iso88591"                 => "ISO-8859-1",
439
        "iso88592"                 => "ISO-8859-2",
440
        "iso88593"                 => "ISO-8859-3",
441
        "iso88594"                 => "ISO-8859-4",
442
        "iso88595"                 => "ISO-8859-5",
443
        "iso88596"                 => "ISO-8859-6",
444
        "iso88597"                 => "ISO-8859-7",
445
        "iso88598"                 => "ISO-8859-8",
446
        "iso88599"                 => "ISO-8859-9",
447
        "iso885910"                => "ISO-8859-10",
448
        "iso885911"                => "ISO-8859-11",
449
        "iso885912"                => "ISO-8859-12",
450
        "iso885913"                => "ISO-8859-13",
451
        "iso885914"                => "ISO-8859-14",
452
        "iso885915"                => "ISO-8859-15",
453
        "cp1250"                   => "windows-1250",
454
        "cp1251"                   => "windows-1251",
455
        "cp1252"                   => "windows-1252",
456
        "cp1253"                   => "windows-1253",
457
        "cp1254"                   => "windows-1254",
458
        "cp1255"                   => "windows-1255",
459
        "cp1256"                   => "windows-1256",
460
        "cp1257"                   => "windows-1257",
461
        "cp1258"                   => "windows-1258",
462
        "x-gbk"                    => "gbk",
463
        "windows-936"              => "gbk",
464
        "ansi-1251"                => "windows-1251",
465
    ];        
466
    
467
    /**
468
     * Returns proper encoding mapping, if exsists. If it doesn't, return unchanged $encoding
469
     * 
470
     * @param string $encoding
471
     * @return string
472
     */
473
    public static function get($encoding) {
474
        if (isset(self::$aliases[strtolower($encoding)])) {
475
            return self::$aliases[strtolower($encoding)];
476
        } else {
477
            return $encoding;
478
        }
479
    }
480
    
481
}
482