Passed
Push — main ( 9e04d2...06a3ca )
by smiley
02:18
created

Hanzi::convertEncoding()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 20
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
cc 4
eloc 10
c 1
b 0
f 1
nc 4
nop 1
dl 0
loc 20
rs 9.9332
1
<?php
2
/**
3
 * Class Hanzi
4
 *
5
 * @created      19.11.2020
6
 * @author       smiley <[email protected]>
7
 * @copyright    2020 smiley
8
 * @license      MIT
9
 */
10
11
namespace chillerlan\QRCode\Data;
12
13
use chillerlan\QRCode\Common\{BitBuffer, Mode};
14
15
use function chr, implode, is_string, mb_convert_encoding, mb_detect_encoding,
16
	mb_detect_order, mb_internal_encoding, mb_strlen, ord, sprintf, strlen;
17
18
/**
19
 * Hanzi (simplified Chinese) mode, GBT18284-2000: double-byte characters from the GB2312/GB18030 character set
20
 *
21
 * Please note that this is not part of the QR Code specification and may not be supported by all readers (ZXing-based ones do).
22
 *
23
 * @see https://en.wikipedia.org/wiki/GB_2312
24
 * @see http://www.herongyang.com/GB2312/Introduction-of-GB2312.html
25
 * @see https://en.wikipedia.org/wiki/GBK_(character_encoding)#Encoding
26
 * @see https://gist.github.com/codemasher/91da33c44bfb48a81a6c1426bb8e4338
27
 * @see https://github.com/zxing/zxing/blob/dfb06fa33b17a9e68321be151c22846c7b78048f/core/src/main/java/com/google/zxing/qrcode/decoder/DecodedBitStreamParser.java#L172-L209
28
 * @see https://www.chinesestandard.net/PDF/English.aspx/GBT18284-2000
29
 */
30
final class Hanzi extends QRDataModeAbstract{
31
32
	// GB2312, GB18030
33
	public const ENCODING = 'GB18030';
34
35
	/**
36
	 * @inheritDoc
37
	 */
38
	protected static int $datamode = Mode::HANZI;
39
40
	/**
41
	 * @inheritDoc
42
	 */
43
	protected function getCharCount():int{
44
		return mb_strlen($this->data, self::ENCODING);
45
	}
46
47
	/**
48
	 * @inheritDoc
49
	 */
50
	public function getLengthInBits():int{
51
		return $this->getCharCount() * 13;
52
	}
53
54
	/**
55
	 * @inheritDoc
56
	 */
57
	public static function convertEncoding(string $string):string{
58
		mb_detect_order([mb_internal_encoding(), 'UTF-8', 'GB2312', 'GB18030', 'CP936', 'EUC-CN', 'HZ']);
59
60
		$detected = mb_detect_encoding($string, null, true);
61
62
		if($detected === false){
63
			throw new QRCodeDataException('mb_detect_encoding error');
64
		}
65
66
		if($detected === self::ENCODING){
67
			return $string;
68
		}
69
70
		$string = mb_convert_encoding($string, self::ENCODING, $detected);
71
72
		if(!is_string($string)){
73
			throw new QRCodeDataException('mb_convert_encoding error');
74
		}
75
76
		return $string;
77
	}
78
79
	/**
80
	 * checks if a string qualifies as Hanzi/GB2312
81
	 */
82
	public static function validateString(string $string):bool{
83
		$string = self::convertEncoding($string);
84
		$len    = strlen($string);
85
86
		if($len < 2 || $len % 2 !== 0){
87
			return false;
88
		}
89
90
		for($i = 0; $i < $len; $i += 2){
91
			$byte1 = ord($string[$i]);
92
			$byte2 = ord($string[$i + 1]);
93
94
			// byte 1 unused ranges
95
			if($byte1 < 0xa1 || ($byte1 > 0xa9 && $byte1 < 0xb0) || $byte1 > 0xf7){
96
				return false;
97
			}
98
99
			// byte 2 unused ranges
100
			if($byte2 < 0xa1 || $byte2 > 0xfe){
101
				return false;
102
			}
103
104
		}
105
106
		return true;
107
	}
108
109
	/**
110
	 * @inheritDoc
111
	 *
112
	 * @throws \chillerlan\QRCode\Data\QRCodeDataException on an illegal character occurence
113
	 */
114
	public function write(BitBuffer $bitBuffer, int $versionNumber):void{
115
116
		$bitBuffer
117
			->put($this::$datamode, 4)
118
			->put($this->getCharCount(), $this::getLengthBits($versionNumber))
119
		;
120
121
		$len = strlen($this->data);
122
123
		for($i = 0; $i + 1 < $len; $i += 2){
124
			$c = ((0xff & ord($this->data[$i])) << 8) | (0xff & ord($this->data[$i + 1]));
125
126
			if($c >= 0xa1a1 && $c <= 0xaafe){
127
				$c -= 0x0a1a1;
128
			}
129
			elseif($c >= 0xb0a1 && $c <= 0xfafe){
130
				$c -= 0x0a6a1;
131
			}
132
			else{
133
				throw new QRCodeDataException(sprintf('illegal char at %d [%d]', $i + 1, $c));
134
			}
135
136
			$bitBuffer->put(((($c >> 8) & 0xff) * 0x060) + ($c & 0xff), 13);
137
		}
138
139
		if($i < $len){
140
			throw new QRCodeDataException(sprintf('illegal char at %d', $i + 1));
141
		}
142
143
	}
144
145
	/**
146
	 * See specification GBT 18284-2000
147
	 *
148
	 * @throws \chillerlan\QRCode\Data\QRCodeDataException
149
	 */
150
	public static function decodeSegment(BitBuffer $bitBuffer, int $versionNumber):string{
151
		$length = $bitBuffer->read(self::getLengthBits($versionNumber));
152
153
		if($bitBuffer->available() < $length * 13){
154
			throw new QRCodeDataException('not enough bits available');
155
		}
156
157
		// Each character will require 2 bytes. Read the characters as 2-byte pairs and decode as GB2312 afterwards
158
		$buffer = [];
159
		$offset = 0;
160
161
		while($length > 0){
162
			// Each 13 bits encodes a 2-byte character
163
			$twoBytes          = $bitBuffer->read(13);
164
			$assembledTwoBytes = (($twoBytes / 0x060) << 8) | ($twoBytes % 0x060);
165
166
			$assembledTwoBytes += ($assembledTwoBytes < 0x00a00) // 0x003BF
167
				? 0x0a1a1  // In the 0xA1A1 to 0xAAFE range
168
				: 0x0a6a1; // In the 0xB0A1 to 0xFAFE range
169
170
			$buffer[$offset]     = chr(0xff & ($assembledTwoBytes >> 8));
171
			$buffer[$offset + 1] = chr(0xff & $assembledTwoBytes);
172
			$offset              += 2;
173
			$length--;
174
		}
175
176
		return mb_convert_encoding(implode($buffer), mb_internal_encoding(), self::ENCODING);
0 ignored issues
show
Bug introduced by
It seems like mb_internal_encoding() can also be of type true; however, parameter $to_encoding of mb_convert_encoding() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

176
		return mb_convert_encoding(implode($buffer), /** @scrutinizer ignore-type */ mb_internal_encoding(), self::ENCODING);
Loading history...
177
	}
178
179
}
180