Hanzi::getCharCount()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 2
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
cc 1
eloc 1
c 1
b 0
f 1
nc 1
nop 0
dl 0
loc 2
rs 10
1
<?php
2
/**
3
 * Class Hanzi
4
 *
5
 * @created      19.11.2020
6
 * @author       smiley <[email protected]>
7
 * @copyright    2020 smiley
8
 * @license      MIT
9
 */
10
11
namespace chillerlan\QRCode\Data;
12
13
use chillerlan\QRCode\Common\{BitBuffer, Mode};
14
15
use Throwable;
16
use function chr, implode, is_string, mb_convert_encoding, mb_detect_encoding,
17
	mb_detect_order, mb_internal_encoding, mb_strlen, ord, sprintf, strlen;
18
19
/**
20
 * Hanzi (simplified Chinese) mode, GBT18284-2000: 13-bit double-byte characters from the GB2312/GB18030 character set
21
 *
22
 * Please note that this is not part of the QR Code specification and may not be supported by all readers (ZXing-based ones do).
23
 *
24
 * @see https://en.wikipedia.org/wiki/GB_2312
25
 * @see http://www.herongyang.com/GB2312/Introduction-of-GB2312.html
26
 * @see https://en.wikipedia.org/wiki/GBK_(character_encoding)#Encoding
27
 * @see https://gist.github.com/codemasher/91da33c44bfb48a81a6c1426bb8e4338
28
 * @see https://github.com/zxing/zxing/blob/dfb06fa33b17a9e68321be151c22846c7b78048f/core/src/main/java/com/google/zxing/qrcode/decoder/DecodedBitStreamParser.java#L172-L209
29
 * @see https://www.chinesestandard.net/PDF/English.aspx/GBT18284-2000
30
 */
31
final class Hanzi extends QRDataModeAbstract{
32
33
	/**
34
	 * possible values: GB2312, GB18030
35
	 *
36
	 * @var string
37
	 */
38
	public const ENCODING = 'GB18030';
39
40
	/**
41
	 * @todo: other subsets???
42
	 *
43
	 * @var int
44
	 */
45
	public const GB2312_SUBSET = 0b0001;
46
47
	/**
48
	 * @inheritDoc
49
	 */
50
	public const DATAMODE = Mode::HANZI;
51
52
	/**
53
	 * @inheritDoc
54
	 */
55
	protected function getCharCount():int{
56
		return mb_strlen($this->data, self::ENCODING);
57
	}
58
59
	/**
60
	 * @inheritDoc
61
	 */
62
	public function getLengthInBits():int{
63
		return ($this->getCharCount() * 13);
64
	}
65
66
	/**
67
	 * @inheritDoc
68
	 */
69
	public static function convertEncoding(string $string):string{
70
		mb_detect_order([mb_internal_encoding(), 'UTF-8', 'GB2312', 'GB18030', 'CP936', 'EUC-CN', 'HZ']);
71
72
		$detected = mb_detect_encoding($string, null, true);
73
74
		if($detected === false){
75
			throw new QRCodeDataException('mb_detect_encoding error');
76
		}
77
78
		if($detected === self::ENCODING){
79
			return $string;
80
		}
81
82
		$string = mb_convert_encoding($string, self::ENCODING, $detected);
83
84
		if(!is_string($string)){
85
			throw new QRCodeDataException('mb_convert_encoding error');
86
		}
87
88
		return $string;
89
	}
90
91
	/**
92
	 * checks if a string qualifies as Hanzi/GB2312
93
	 */
94
	public static function validateString(string $string):bool{
95
96
		try{
97
			$string = self::convertEncoding($string);
98
		}
99
		catch(Throwable $e){
100
			return false;
101
		}
102
103
		$len = strlen($string);
104
105
		if($len < 2 || ($len % 2) !== 0){
106
			return false;
107
		}
108
109
		for($i = 0; $i < $len; $i += 2){
110
			$byte1 = ord($string[$i]);
111
			$byte2 = ord($string[($i + 1)]);
112
113
			// byte 1 unused ranges
114
			if($byte1 < 0xa1 || ($byte1 > 0xa9 && $byte1 < 0xb0) || $byte1 > 0xf7){
115
				return false;
116
			}
117
118
			// byte 2 unused ranges
119
			if($byte2 < 0xa1 || $byte2 > 0xfe){
120
				return false;
121
			}
122
123
		}
124
125
		return true;
126
	}
127
128
	/**
129
	 * @inheritDoc
130
	 *
131
	 * @throws \chillerlan\QRCode\Data\QRCodeDataException on an illegal character occurence
132
	 */
133
	public function write(BitBuffer $bitBuffer, int $versionNumber):QRDataModeInterface{
134
135
		$bitBuffer
136
			->put(self::DATAMODE, 4)
137
			->put($this::GB2312_SUBSET, 4)
138
			->put($this->getCharCount(), $this::getLengthBits($versionNumber))
139
		;
140
141
		$len = strlen($this->data);
142
143
		for($i = 0; ($i + 1) < $len; $i += 2){
144
			$c = (((0xff & ord($this->data[$i])) << 8) | (0xff & ord($this->data[($i + 1)])));
145
146
			if($c >= 0xa1a1 && $c <= 0xaafe){
147
				$c -= 0x0a1a1;
148
			}
149
			elseif($c >= 0xb0a1 && $c <= 0xfafe){
150
				$c -= 0x0a6a1;
151
			}
152
			else{
153
				throw new QRCodeDataException(sprintf('illegal char at %d [%d]', ($i + 1), $c));
154
			}
155
156
			$bitBuffer->put((((($c >> 8) & 0xff) * 0x060) + ($c & 0xff)), 13);
157
		}
158
159
		if($i < $len){
160
			throw new QRCodeDataException(sprintf('illegal char at %d', ($i + 1)));
161
		}
162
163
		return $this;
164
	}
165
166
	/**
167
	 * See specification GBT 18284-2000
168
	 *
169
	 * @throws \chillerlan\QRCode\Data\QRCodeDataException
170
	 */
171
	public static function decodeSegment(BitBuffer $bitBuffer, int $versionNumber):string{
172
173
		// Hanzi mode contains a subset indicator right after mode indicator
174
		if($bitBuffer->read(4) !== self::GB2312_SUBSET){
175
			throw new QRCodeDataException('ecpected subset indicator for Hanzi mode');
176
		}
177
178
		$length = $bitBuffer->read(self::getLengthBits($versionNumber));
179
180
		if($bitBuffer->available() < ($length * 13)){
181
			throw new QRCodeDataException('not enough bits available');
182
		}
183
184
		// Each character will require 2 bytes. Read the characters as 2-byte pairs and decode as GB2312 afterwards
185
		$buffer = [];
186
		$offset = 0;
187
188
		while($length > 0){
189
			// Each 13 bits encodes a 2-byte character
190
			$twoBytes          = $bitBuffer->read(13);
191
			$assembledTwoBytes = ((((int)($twoBytes / 0x060)) << 8) | ($twoBytes % 0x060));
192
193
			$assembledTwoBytes += ($assembledTwoBytes < 0x00a00) // 0x003BF
194
				? 0x0a1a1  // In the 0xA1A1 to 0xAAFE range
195
				: 0x0a6a1; // In the 0xB0A1 to 0xFAFE range
196
197
			$buffer[$offset]       = chr(0xff & ($assembledTwoBytes >> 8));
198
			$buffer[($offset + 1)] = chr(0xff & $assembledTwoBytes);
199
			$offset                += 2;
200
			$length--;
201
		}
202
203
		return mb_convert_encoding(implode($buffer), mb_internal_encoding(), self::ENCODING);
0 ignored issues
show
Bug introduced by
It seems like mb_internal_encoding() can also be of type true; however, parameter $to_encoding of mb_convert_encoding() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

203
		return mb_convert_encoding(implode($buffer), /** @scrutinizer ignore-type */ mb_internal_encoding(), self::ENCODING);
Loading history...
204
	}
205
206
}
207