Passed
Push — enums ( 8bfe56...7bf697 )
by Fabio
06:38 queued 01:28
created

TEscCharsetConverter   A

Complexity

Total Complexity 5

Size/Duplication

Total Lines 80
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 52
c 1
b 0
f 0
dl 0
loc 80
rs 10
wmc 5

2 Methods

Rating   Name   Duplication   Size   Complexity  
A encodeEscapeCharset() 0 6 2
A decodeEscapeCharset() 0 11 3
1
<?php
2
/**
3
 * TEscCharsetConverter class file
4
 *
5
 * @author Brad Anderson <[email protected]>
6
 * @link https://github.com/pradosoft/prado
7
 * @license https://github.com/pradosoft/prado/blob/master/LICENSE
8
 */
9
10
namespace Prado\Util;
11
12
/**
13
 * TEscCharsetConverter class.
14
 *
15
 * TEscCharsetConverter is the ESC Charset Converter for converting between ESC
16
 * character sets] encodings and their iConv character encodings.
17
 *
18
 * Each Esc charset Encoding has 4 versions for G0, G1, G2, and G3.
19
 *
20
 * @author Brad Anderson <[email protected]>
21
 * @since 4.2.3
22
 * @see https://en.wikipedia.org/wiki/ISO/IEC_2022 General structure of character encodings.
23
 * @see https://en.wikipedia.org/wiki/ISO/IEC_646 National standards for ASCII.
24
 * @see https://www.sljfaq.org/afaq/encodings.html Japanese encodings and character sets.
25
 *
26
 * These are not yet in iconv (as of April, 2023):
27
 * @todo missing ISO-5427. https://en.wikipedia.org/wiki/ISO_5427. (8 bit Cyrillic, 1979/1981)
28
 * @todo missing ČSN (Czech technical standard) 369103. https://en.wikipedia.org/wiki/KOI_character_encodings (also Cyrillic)
29
 */
30
class TEscCharsetConverter
31
{
32
	public const ESC_CHAR_ENCODINGS_MAP = [
33
			"\x1B\x25\x47" => 'UTF-8', // ESC-'%G'
34
35
			"\x1B\x28\x40" => 'ASCII',		"\x1B\x29\x40" => 'ASCII',		"\x1B\x2A\x40" => 'ASCII',		"\x1B\x2B\x40" => 'ASCII',
36
			"\x1B\x28\x41" => 'ASCII.en_GB', "\x1B\x29\x41" => 'ASCII.en_GB', "\x1B\x2A\x41" => 'ASCII.en_GB', "\x1B\x2B\x41" => 'ASCII.en_GB',
37
			"\x1B\x28\x42" => 'ASCII.en_US', "\x1B\x29\x42" => 'ASCII.en_US', "\x1B\x2A\x42" => 'ASCII.en_US', "\x1B\x2B\x42" => 'ASCII.en_US',
38
			"\x1B\x28\x43" => 'ASCII.fi',	"\x1B\x29\x43" => 'ASCII.fi',	"\x1B\x2A\x43" => 'ASCII.fi',	"\x1B\x2B\x43" => 'ASCII.fi',
39
			"\x1B\x28\x44" => 'ASCII.sv',	"\x1B\x29\x44" => 'ASCII.sv',	"\x1B\x2A\x44" => 'ASCII.sv',	"\x1B\x2B\x44" => 'ASCII.sv',
40
			"\x1B\x28\x45" => 'ASCII.no',	"\x1B\x29\x45" => 'ASCII.no',	"\x1B\x2A\x45" => 'ASCII.no',	"\x1B\x2B\x45" => 'ASCII.no',
41
			"\x1B\x28\x46" => 'ASCII.no',	"\x1B\x29\x46" => 'ASCII.no',	"\x1B\x2A\x46" => 'ASCII.no',	"\x1B\x2B\x46" => 'ASCII.no',
42
			"\x1B\x28\x47" => 'ASCII.se',	"\x1B\x29\x47" => 'ASCII.se',	"\x1B\x2A\x47" => 'ASCII.se',	"\x1B\x2B\x47" => 'ASCII.se',
43
			"\x1B\x28\x49" => 'JIS_X0201',	"\x1B\x29\x49" => 'JIS_X0201',	"\x1B\x2A\x49" => 'JIS_X0201',	"\x1B\x2B\x49" => 'JIS_X0201',
44
			"\x1B\x28\x4A" => 'JIS_X0201',	"\x1B\x29\x4A" => 'JIS_X0201',	"\x1B\x2A\x4A" => 'JIS_X0201',	"\x1B\x2B\x4A" => 'JIS_X0201',
45
			"\x1B\x28\x4B" => 'ASCII.de',	"\x1B\x29\x4B" => 'ASCII.de',	"\x1B\x2A\x4B" => 'ASCII.de',	"\x1B\x2B\x4B" => 'ASCII.de',
46
			"\x1B\x28\x4C" => 'ASCII.pt',	"\x1B\x29\x4C" => 'ASCII.pt',	"\x1B\x2A\x4C" => 'ASCII.pt',	"\x1B\x2B\x4C" => 'ASCII.pt',
47
			"\x1B\x28\x4E" => 'ISO-5427',	"\x1B\x29\x4E" => 'ISO-5427',	"\x1B\x2A\x4E" => 'ISO-5427',	"\x1B\x2B\x4E" => 'ISO-5427',
48
			"\x1B\x28\x54" => 'ISO646-CN',	"\x1B\x29\x54" => 'ISO646-CN',	"\x1B\x2A\x54" => 'ISO646-CN',	"\x1B\x2B\x54" => 'ISO646-CN',
49
			"\x1B\x28\x59" => 'ASCII.it',	"\x1B\x29\x59" => 'ASCII.it',	"\x1B\x2A\x59" => 'ASCII.it',	"\x1B\x2B\x59" => 'ASCII.it',
50
			"\x1B\x28\x5A" => 'ASCII.es',	"\x1B\x29\x5A" => 'ASCII.es',	"\x1B\x2A\x5A" => 'ASCII.es',	"\x1B\x2B\x5A" => 'ASCII.es',
51
			"\x1B\x28\x5B" => 'ASCII.el',	"\x1B\x29\x5B" => 'ASCII.el',	"\x1B\x2A\x5B" => 'ASCII.el',	"\x1B\x2B\x5B" => 'ASCII.el',
52
			"\x1B\x28\x60" => 'ASCII.no',	"\x1B\x29\x60" => 'ASCII.no',	"\x1B\x2A\x60" => 'ASCII.no',	"\x1B\x2B\x60" => 'ASCII.no',
53
			"\x1B\x28\x66" => 'ASCII.fr',	"\x1B\x29\x66" => 'ASCII.fr',	"\x1B\x2A\x66" => 'ASCII.fr',	"\x1B\x2B\x66" => 'ASCII.fr',
54
			"\x1B\x28\x67" => 'ASCII.pt',	"\x1B\x29\x67" => 'ASCII.pt',	"\x1B\x2A\x67" => 'ASCII.pt',	"\x1B\x2B\x67" => 'ASCII.pt',
55
			"\x1B\x28\x68" => 'ASCII.es',	"\x1B\x29\x68" => 'ASCII.es',	"\x1B\x2A\x68" => 'ASCII.es',	"\x1B\x2B\x68" => 'ASCII.es',
56
			"\x1B\x28\x69" => 'ASCII.hu',	"\x1B\x29\x69" => 'ASCII.hu',	"\x1B\x2A\x69" => 'ASCII.hu',	"\x1B\x2B\x69" => 'ASCII.hu',
57
			"\x1B\x28\x77" => 'ASCII.fr_CA', "\x1B\x29\x77" => 'ASCII.fr_CA', "\x1B\x2A\x77" => 'ASCII.fr_CA', "\x1B\x2B\x77" => 'ASCII.fr_CA',
58
			"\x1B\x28\x78" => 'ASCII.fr_CA', "\x1B\x29\x78" => 'ASCII.fr_CA', "\x1B\x2A\x78" => 'ASCII.fr_CA', "\x1B\x2B\x78" => 'ASCII.fr_CA',
59
			"\x1B\x28\x7A" => 'ASCII.yu',	"\x1B\x29\x7A" => 'ASCII.yu',	"\x1B\x2A\x7A" => 'ASCII.yu',	"\x1B\x2B\x7A" => 'ASCII.yu',
60
61
			"\x1B\x2C\x41" => 'ISO-8859-1',	"\x1B\x2D\x41" => 'ISO-8859-1',	"\x1B\x2E\x41" => 'ISO-8859-1',	"\x1B\x2F\x41" => 'ISO-8859-1',
62
			"\x1B\x2C\x42" => 'ISO-8859-2',	"\x1B\x2D\x42" => 'ISO-8859-2',	"\x1B\x2E\x42" => 'ISO-8859-2',	"\x1B\x2F\x42" => 'ISO-8859-2',
63
			"\x1B\x2C\x43" => 'ISO-8859-3',	"\x1B\x2D\x43" => 'ISO-8859-3',	"\x1B\x2E\x43" => 'ISO-8859-3',	"\x1B\x2F\x43" => 'ISO-8859-3',
64
			"\x1B\x2C\x44" => 'ISO-8859-4',	"\x1B\x2D\x44" => 'ISO-8859-4',	"\x1B\x2E\x44" => 'ISO-8859-4',	"\x1B\x2F\x44" => 'ISO-8859-4',
65
			"\x1B\x2C\x45" => 'ISO-8859-5',	"\x1B\x2D\x45" => 'ISO-8859-5',	"\x1B\x2E\x45" => 'ISO-8859-5',	"\x1B\x2F\x45" => 'ISO-8859-5',
66
			"\x1B\x2C\x46" => 'ISO-8859-7',	"\x1B\x2D\x46" => 'ISO-8859-7',	"\x1B\x2E\x46" => 'ISO-8859-7',	"\x1B\x2F\x46" => 'ISO-8859-7',
67
			"\x1B\x2C\x47" => 'ISO-8859-6',	"\x1B\x2D\x47" => 'ISO-8859-6',	"\x1B\x2E\x47" => 'ISO-8859-6',	"\x1B\x2F\x47" => 'ISO-8859-6',
68
			"\x1B\x2C\x48" => 'ISO-8859-8',	"\x1B\x2D\x48" => 'ISO-8859-8',	"\x1B\x2E\x48" => 'ISO-8859-8',	"\x1B\x2F\x48" => 'ISO-8859-8',
69
			"\x1B\x2C\x49" => 'CSN 369103',	"\x1B\x2D\x49" => 'CSN 369103',	"\x1B\x2E\x49" => 'CSN 369103',	"\x1B\x2F\x49" => 'CSN 369103',
70
71
			"\x1B\x24\x28\x40" => 'JIS0208',	"\x1B\x24\x29\x40" => 'JIS0208',	"\x1B\x24\x2A\x40" => 'JIS0208',	"\x1B\x24\x2B\x40" => 'JIS0208',
72
			"\x1B\x24\x28\x42" => 'JIS0208',	"\x1B\x24\x29\x42" => 'JIS0208',	"\x1B\x24\x2A\x42" => 'JIS0208',	"\x1B\x24\x2B\x42" => 'JIS0208',
73
			"\x1B\x24\x28\x44" => 'JIS_X0212',	"\x1B\x24\x29\x44" => 'JIS_X0212',	"\x1B\x24\x2A\x44" => 'JIS_X0212',	"\x1B\x24\x2B\x44" => 'JIS_X0212',
74
			"\x1B\x24\x28\x4F" => 'ISO-2022-JP-3',	"\x1B\x24\x29\x4F" => 'ISO-2022-JP-3',	"\x1B\x24\x2A\x4F" => 'ISO-2022-JP-3',	"\x1B\x24\x2B\x4F" => 'ISO-2022-JP-3',
75
			"\x1B\x24\x28\x50" => 'ISO-2022-JP-3',	"\x1B\x24\x29\x50" => 'ISO-2022-JP-3',	"\x1B\x24\x2A\x50" => 'ISO-2022-JP-3',	"\x1B\x24\x2B\x50" => 'ISO-2022-JP-3',
76
		];
77
78
	/**
79
	 * Convert an Escape Character Code Encoding to the iconv character
80
	 * encoding.
81
	 * @param string $charset The ESC character code for conversion.
82
	 * @return ?string The decoded Character Encoding or null if not found.
83
	 */
84
	public static function decodeEscapeCharset(string $charset): ?string
85
	{
86
		$esc = "\x1B";
87
		$codes = explode($esc, trim($charset, $esc));
88
		foreach ($codes as $code) {
89
			$code = $esc . $code;
90
			if (array_key_exists($code, self::ESC_CHAR_ENCODINGS_MAP)) {
91
				return self::ESC_CHAR_ENCODINGS_MAP[$code];
92
			}
93
		}
94
		return null;
95
	}
96
97
	/**
98
	 * Convert an Escape Character Code Encoding to the iconv character
99
	 * encoding.
100
	 * @param string $charset The iconv charset encoding to be encoded
101
	 * @return ?string The ESC character code representing the encoding
102
	 *   or null if not found.
103
	 */
104
	public static function encodeEscapeCharset(string $charset): ?string
105
	{
106
		if (($escEncoding = array_search($charset, self::ESC_CHAR_ENCODINGS_MAP)) !== false) {
107
			return $escEncoding;
108
		}
109
		return null;
110
	}
111
}
112