|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* TEscCharsetConverter class file |
|
4
|
|
|
* |
|
5
|
|
|
* @author Brad Anderson <[email protected]> |
|
6
|
|
|
* @link https://github.com/pradosoft/prado |
|
7
|
|
|
* @license https://github.com/pradosoft/prado/blob/master/LICENSE |
|
8
|
|
|
*/ |
|
9
|
|
|
|
|
10
|
|
|
namespace Prado\Util; |
|
11
|
|
|
|
|
12
|
|
|
/** |
|
13
|
|
|
* TEscCharsetConverter class. |
|
14
|
|
|
* |
|
15
|
|
|
* TEscCharsetConverter is the ESC Charset Converter for converting between ESC |
|
16
|
|
|
* character sets] encodings and their iConv character encodings. |
|
17
|
|
|
* |
|
18
|
|
|
* Each Esc charset Encoding has 4 versions for G0, G1, G2, and G3. |
|
19
|
|
|
* |
|
20
|
|
|
* @author Brad Anderson <[email protected]> |
|
21
|
|
|
* @since 4.2.3 |
|
22
|
|
|
* @see https://en.wikipedia.org/wiki/ISO/IEC_2022 General structure of character encodings. |
|
23
|
|
|
* @see https://en.wikipedia.org/wiki/ISO/IEC_646 National standards for ASCII. |
|
24
|
|
|
* @see https://www.sljfaq.org/afaq/encodings.html Japanese encodings and character sets. |
|
25
|
|
|
* |
|
26
|
|
|
* These are not yet in iconv (as of April, 2023): |
|
27
|
|
|
* @todo missing ISO-5427. https://en.wikipedia.org/wiki/ISO_5427. (8 bit Cyrillic, 1979/1981) |
|
28
|
|
|
* @todo missing ČSN (Czech technical standard) 369103. https://en.wikipedia.org/wiki/KOI_character_encodings (also Cyrillic) |
|
29
|
|
|
*/ |
|
30
|
|
|
class TEscCharsetConverter |
|
31
|
|
|
{ |
|
32
|
|
|
public const ESC_CHAR_ENCODINGS_MAP = [ |
|
33
|
|
|
"\x1B\x25\x47" => 'UTF-8', // ESC-'%G' |
|
34
|
|
|
|
|
35
|
|
|
"\x1B\x28\x40" => 'ASCII', "\x1B\x29\x40" => 'ASCII', "\x1B\x2A\x40" => 'ASCII', "\x1B\x2B\x40" => 'ASCII', |
|
36
|
|
|
"\x1B\x28\x41" => 'ASCII.en_GB', "\x1B\x29\x41" => 'ASCII.en_GB', "\x1B\x2A\x41" => 'ASCII.en_GB', "\x1B\x2B\x41" => 'ASCII.en_GB', |
|
37
|
|
|
"\x1B\x28\x42" => 'ASCII.en_US', "\x1B\x29\x42" => 'ASCII.en_US', "\x1B\x2A\x42" => 'ASCII.en_US', "\x1B\x2B\x42" => 'ASCII.en_US', |
|
38
|
|
|
"\x1B\x28\x43" => 'ASCII.fi', "\x1B\x29\x43" => 'ASCII.fi', "\x1B\x2A\x43" => 'ASCII.fi', "\x1B\x2B\x43" => 'ASCII.fi', |
|
39
|
|
|
"\x1B\x28\x44" => 'ASCII.sv', "\x1B\x29\x44" => 'ASCII.sv', "\x1B\x2A\x44" => 'ASCII.sv', "\x1B\x2B\x44" => 'ASCII.sv', |
|
40
|
|
|
"\x1B\x28\x45" => 'ASCII.no', "\x1B\x29\x45" => 'ASCII.no', "\x1B\x2A\x45" => 'ASCII.no', "\x1B\x2B\x45" => 'ASCII.no', |
|
41
|
|
|
"\x1B\x28\x46" => 'ASCII.no', "\x1B\x29\x46" => 'ASCII.no', "\x1B\x2A\x46" => 'ASCII.no', "\x1B\x2B\x46" => 'ASCII.no', |
|
42
|
|
|
"\x1B\x28\x47" => 'ASCII.se', "\x1B\x29\x47" => 'ASCII.se', "\x1B\x2A\x47" => 'ASCII.se', "\x1B\x2B\x47" => 'ASCII.se', |
|
43
|
|
|
"\x1B\x28\x49" => 'JIS_X0201', "\x1B\x29\x49" => 'JIS_X0201', "\x1B\x2A\x49" => 'JIS_X0201', "\x1B\x2B\x49" => 'JIS_X0201', |
|
44
|
|
|
"\x1B\x28\x4A" => 'JIS_X0201', "\x1B\x29\x4A" => 'JIS_X0201', "\x1B\x2A\x4A" => 'JIS_X0201', "\x1B\x2B\x4A" => 'JIS_X0201', |
|
45
|
|
|
"\x1B\x28\x4B" => 'ASCII.de', "\x1B\x29\x4B" => 'ASCII.de', "\x1B\x2A\x4B" => 'ASCII.de', "\x1B\x2B\x4B" => 'ASCII.de', |
|
46
|
|
|
"\x1B\x28\x4C" => 'ASCII.pt', "\x1B\x29\x4C" => 'ASCII.pt', "\x1B\x2A\x4C" => 'ASCII.pt', "\x1B\x2B\x4C" => 'ASCII.pt', |
|
47
|
|
|
"\x1B\x28\x4E" => 'ISO-5427', "\x1B\x29\x4E" => 'ISO-5427', "\x1B\x2A\x4E" => 'ISO-5427', "\x1B\x2B\x4E" => 'ISO-5427', |
|
48
|
|
|
"\x1B\x28\x54" => 'ISO646-CN', "\x1B\x29\x54" => 'ISO646-CN', "\x1B\x2A\x54" => 'ISO646-CN', "\x1B\x2B\x54" => 'ISO646-CN', |
|
49
|
|
|
"\x1B\x28\x59" => 'ASCII.it', "\x1B\x29\x59" => 'ASCII.it', "\x1B\x2A\x59" => 'ASCII.it', "\x1B\x2B\x59" => 'ASCII.it', |
|
50
|
|
|
"\x1B\x28\x5A" => 'ASCII.es', "\x1B\x29\x5A" => 'ASCII.es', "\x1B\x2A\x5A" => 'ASCII.es', "\x1B\x2B\x5A" => 'ASCII.es', |
|
51
|
|
|
"\x1B\x28\x5B" => 'ASCII.el', "\x1B\x29\x5B" => 'ASCII.el', "\x1B\x2A\x5B" => 'ASCII.el', "\x1B\x2B\x5B" => 'ASCII.el', |
|
52
|
|
|
"\x1B\x28\x60" => 'ASCII.no', "\x1B\x29\x60" => 'ASCII.no', "\x1B\x2A\x60" => 'ASCII.no', "\x1B\x2B\x60" => 'ASCII.no', |
|
53
|
|
|
"\x1B\x28\x66" => 'ASCII.fr', "\x1B\x29\x66" => 'ASCII.fr', "\x1B\x2A\x66" => 'ASCII.fr', "\x1B\x2B\x66" => 'ASCII.fr', |
|
54
|
|
|
"\x1B\x28\x67" => 'ASCII.pt', "\x1B\x29\x67" => 'ASCII.pt', "\x1B\x2A\x67" => 'ASCII.pt', "\x1B\x2B\x67" => 'ASCII.pt', |
|
55
|
|
|
"\x1B\x28\x68" => 'ASCII.es', "\x1B\x29\x68" => 'ASCII.es', "\x1B\x2A\x68" => 'ASCII.es', "\x1B\x2B\x68" => 'ASCII.es', |
|
56
|
|
|
"\x1B\x28\x69" => 'ASCII.hu', "\x1B\x29\x69" => 'ASCII.hu', "\x1B\x2A\x69" => 'ASCII.hu', "\x1B\x2B\x69" => 'ASCII.hu', |
|
57
|
|
|
"\x1B\x28\x77" => 'ASCII.fr_CA', "\x1B\x29\x77" => 'ASCII.fr_CA', "\x1B\x2A\x77" => 'ASCII.fr_CA', "\x1B\x2B\x77" => 'ASCII.fr_CA', |
|
58
|
|
|
"\x1B\x28\x78" => 'ASCII.fr_CA', "\x1B\x29\x78" => 'ASCII.fr_CA', "\x1B\x2A\x78" => 'ASCII.fr_CA', "\x1B\x2B\x78" => 'ASCII.fr_CA', |
|
59
|
|
|
"\x1B\x28\x7A" => 'ASCII.yu', "\x1B\x29\x7A" => 'ASCII.yu', "\x1B\x2A\x7A" => 'ASCII.yu', "\x1B\x2B\x7A" => 'ASCII.yu', |
|
60
|
|
|
|
|
61
|
|
|
"\x1B\x2C\x41" => 'ISO-8859-1', "\x1B\x2D\x41" => 'ISO-8859-1', "\x1B\x2E\x41" => 'ISO-8859-1', "\x1B\x2F\x41" => 'ISO-8859-1', |
|
62
|
|
|
"\x1B\x2C\x42" => 'ISO-8859-2', "\x1B\x2D\x42" => 'ISO-8859-2', "\x1B\x2E\x42" => 'ISO-8859-2', "\x1B\x2F\x42" => 'ISO-8859-2', |
|
63
|
|
|
"\x1B\x2C\x43" => 'ISO-8859-3', "\x1B\x2D\x43" => 'ISO-8859-3', "\x1B\x2E\x43" => 'ISO-8859-3', "\x1B\x2F\x43" => 'ISO-8859-3', |
|
64
|
|
|
"\x1B\x2C\x44" => 'ISO-8859-4', "\x1B\x2D\x44" => 'ISO-8859-4', "\x1B\x2E\x44" => 'ISO-8859-4', "\x1B\x2F\x44" => 'ISO-8859-4', |
|
65
|
|
|
"\x1B\x2C\x45" => 'ISO-8859-5', "\x1B\x2D\x45" => 'ISO-8859-5', "\x1B\x2E\x45" => 'ISO-8859-5', "\x1B\x2F\x45" => 'ISO-8859-5', |
|
66
|
|
|
"\x1B\x2C\x46" => 'ISO-8859-7', "\x1B\x2D\x46" => 'ISO-8859-7', "\x1B\x2E\x46" => 'ISO-8859-7', "\x1B\x2F\x46" => 'ISO-8859-7', |
|
67
|
|
|
"\x1B\x2C\x47" => 'ISO-8859-6', "\x1B\x2D\x47" => 'ISO-8859-6', "\x1B\x2E\x47" => 'ISO-8859-6', "\x1B\x2F\x47" => 'ISO-8859-6', |
|
68
|
|
|
"\x1B\x2C\x48" => 'ISO-8859-8', "\x1B\x2D\x48" => 'ISO-8859-8', "\x1B\x2E\x48" => 'ISO-8859-8', "\x1B\x2F\x48" => 'ISO-8859-8', |
|
69
|
|
|
"\x1B\x2C\x49" => 'CSN 369103', "\x1B\x2D\x49" => 'CSN 369103', "\x1B\x2E\x49" => 'CSN 369103', "\x1B\x2F\x49" => 'CSN 369103', |
|
70
|
|
|
|
|
71
|
|
|
"\x1B\x24\x28\x40" => 'JIS0208', "\x1B\x24\x29\x40" => 'JIS0208', "\x1B\x24\x2A\x40" => 'JIS0208', "\x1B\x24\x2B\x40" => 'JIS0208', |
|
72
|
|
|
"\x1B\x24\x28\x42" => 'JIS0208', "\x1B\x24\x29\x42" => 'JIS0208', "\x1B\x24\x2A\x42" => 'JIS0208', "\x1B\x24\x2B\x42" => 'JIS0208', |
|
73
|
|
|
"\x1B\x24\x28\x44" => 'JIS_X0212', "\x1B\x24\x29\x44" => 'JIS_X0212', "\x1B\x24\x2A\x44" => 'JIS_X0212', "\x1B\x24\x2B\x44" => 'JIS_X0212', |
|
74
|
|
|
"\x1B\x24\x28\x4F" => 'ISO-2022-JP-3', "\x1B\x24\x29\x4F" => 'ISO-2022-JP-3', "\x1B\x24\x2A\x4F" => 'ISO-2022-JP-3', "\x1B\x24\x2B\x4F" => 'ISO-2022-JP-3', |
|
75
|
|
|
"\x1B\x24\x28\x50" => 'ISO-2022-JP-3', "\x1B\x24\x29\x50" => 'ISO-2022-JP-3', "\x1B\x24\x2A\x50" => 'ISO-2022-JP-3', "\x1B\x24\x2B\x50" => 'ISO-2022-JP-3', |
|
76
|
|
|
]; |
|
77
|
|
|
|
|
78
|
|
|
/** |
|
79
|
|
|
* Convert an Escape Character Code Encoding to the iconv character |
|
80
|
|
|
* encoding. |
|
81
|
|
|
* @param string $charset The ESC character code for conversion. |
|
82
|
|
|
* @return ?string The decoded Character Encoding or null if not found. |
|
83
|
|
|
*/ |
|
84
|
|
|
public static function decodeEscapeCharset(string $charset): ?string |
|
85
|
|
|
{ |
|
86
|
|
|
$esc = "\x1B"; |
|
87
|
|
|
$codes = explode($esc, trim($charset, $esc)); |
|
88
|
|
|
foreach ($codes as $code) { |
|
89
|
|
|
$code = $esc . $code; |
|
90
|
|
|
if (array_key_exists($code, self::ESC_CHAR_ENCODINGS_MAP)) { |
|
91
|
|
|
return self::ESC_CHAR_ENCODINGS_MAP[$code]; |
|
92
|
|
|
} |
|
93
|
|
|
} |
|
94
|
|
|
return null; |
|
95
|
|
|
} |
|
96
|
|
|
|
|
97
|
|
|
/** |
|
98
|
|
|
* Convert an Escape Character Code Encoding to the iconv character |
|
99
|
|
|
* encoding. |
|
100
|
|
|
* @param string $charset The iconv charset encoding to be encoded |
|
101
|
|
|
* @return ?string The ESC character code representing the encoding |
|
102
|
|
|
* or null if not found. |
|
103
|
|
|
*/ |
|
104
|
|
|
public static function encodeEscapeCharset(string $charset): ?string |
|
105
|
|
|
{ |
|
106
|
|
|
if (($escEncoding = array_search($charset, self::ESC_CHAR_ENCODINGS_MAP)) !== false) { |
|
107
|
|
|
return $escEncoding; |
|
108
|
|
|
} |
|
109
|
|
|
return null; |
|
110
|
|
|
} |
|
111
|
|
|
} |
|
112
|
|
|
|