Passed
Push — main ( 426c16...9e04d2 )
by smiley
01:56
created

Kanji::convertEncoding()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 20
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 10
nc 4
nop 1
dl 0
loc 20
rs 9.9332
c 0
b 0
f 0
1
<?php
2
/**
3
 * Class Kanji
4
 *
5
 * @created      25.11.2015
6
 * @author       Smiley <[email protected]>
7
 * @copyright    2015 Smiley
8
 * @license      MIT
9
 */
10
11
namespace chillerlan\QRCode\Data;
12
13
use chillerlan\QRCode\Common\{BitBuffer, Mode};
14
15
use function chr, implode, is_string, mb_convert_encoding, mb_detect_encoding,
16
	mb_detect_order, mb_internal_encoding, mb_strlen, ord, sprintf, strlen;
17
18
/**
19
 * Kanji mode: double-byte characters from the Shift-JIS character set
20
 *
21
 * ISO/IEC 18004:2000 Section 8.3.5
22
 * ISO/IEC 18004:2000 Section 8.4.5
23
 *
24
 * @see https://en.wikipedia.org/wiki/Shift_JIS#As_defined_in_JIS_X_0208:1997
25
 * @see http://www.rikai.com/library/kanjitables/kanji_codes.sjis.shtml
26
 * @see https://gist.github.com/codemasher/d07d3e6e9346c08e7a41b8b978784952
27
 */
28
final class Kanji extends QRDataModeAbstract{
29
30
	// SJIS, SJIS-2004
31
	// SJIS-2004 may produce errors in PHP < 8
32
	public const sjisEncoding = 'SJIS';
33
34
	/**
35
	 * @inheritDoc
36
	 */
37
	protected static int $datamode = Mode::KANJI;
38
39
	/**
40
	 * @inheritDoc
41
	 */
42
	protected function getCharCount():int{
43
		return mb_strlen($this->data, self::sjisEncoding);
44
	}
45
46
	/**
47
	 * @inheritDoc
48
	 */
49
	public function getLengthInBits():int{
50
		return $this->getCharCount() * 13;
51
	}
52
53
	/**
54
	 * @inheritDoc
55
	 */
56
	public static function convertEncoding(string $string):string{
57
		mb_detect_order(['ASCII', mb_internal_encoding(), 'UTF-8', 'SJIS', 'SJIS-2004']);
58
59
		$detected = mb_detect_encoding($string, null, true);
60
61
		if($detected === false){
62
			throw new QRCodeDataException('mb_detect_encoding error');
63
		}
64
65
		if($detected === self::sjisEncoding){
66
			return $string;
67
		}
68
69
		$string = mb_convert_encoding($string, self::sjisEncoding, $detected);
70
71
		if(!is_string($string)){
72
			throw new QRCodeDataException(sprintf('invalid encoding: %s', $detected));
73
		}
74
75
		return $string;
76
	}
77
78
	/**
79
	 * checks if a string qualifies as SJIS Kanji
80
	 */
81
	public static function validateString(string $string):bool{
82
		$string = self::convertEncoding($string);
83
		$len    = strlen($string);
84
85
		if($len < 2 || $len % 2 !== 0){
86
			return false;
87
		}
88
89
		for($i = 0; $i < $len; $i += 2){
90
			$byte1 = ord($string[$i]);
91
			$byte2 = ord($string[$i + 1]);
92
93
			// byte 1 unused and vendor ranges
94
			if($byte1 < 0x81 || ($byte1 > 0x84 && $byte1 < 0x88) || ($byte1 > 0x9f && $byte1 < 0xe0) ||  $byte1 > 0xea){
95
				return false;
96
			}
97
98
			// byte 2 unused ranges
99
			if($byte2 < 0x40 || $byte2 === 0x7f || $byte2 > 0xfc){
100
				return false;
101
			}
102
103
			// byte 1 is even, second byte in range 0x9f - 0xfc
104
			if(($byte1 % 2) === 0){
105
				if($byte2 < 0x9f){
106
					return false;
107
				}
108
			}
109
			// byte 1 is odd, second byte in range 0x40 - 0x9e (technically)
110
			// now this is weird: according to spec, the second byte should be lower than 0x9e.
111
			// however, converting encodings back and forth seems to mess with the string somehow.
112
			// someone please riddle me this
113
#			else{
114
#				if($byte2 > 0x9e){
115
#					return false;
116
#				}
117
#			}
118
119
		}
120
121
		return true;
122
	}
123
124
	/**
125
	 * @inheritDoc
126
	 *
127
	 * @throws \chillerlan\QRCode\Data\QRCodeDataException on an illegal character occurence
128
	 */
129
	public function write(BitBuffer $bitBuffer, int $versionNumber):void{
130
131
		$bitBuffer
132
			->put($this::$datamode, 4)
133
			->put($this->getCharCount(), $this::getLengthBits($versionNumber))
134
		;
135
136
		$len = strlen($this->data);
137
138
		for($i = 0; $i + 1 < $len; $i += 2){
139
			$c = ((0xff & ord($this->data[$i])) << 8) | (0xff & ord($this->data[$i + 1]));
140
141
			if($c >= 0x8140 && $c <= 0x9ffC){
142
				$c -= 0x8140;
143
			}
144
			elseif($c >= 0xe040 && $c <= 0xebbf){
145
				$c -= 0xc140;
146
			}
147
			else{
148
				throw new QRCodeDataException(sprintf('illegal char at %d [%d]', $i + 1, $c));
149
			}
150
151
			$bitBuffer->put(((($c >> 8) & 0xff) * 0xc0) + ($c & 0xff), 13);
152
		}
153
154
		if($i < $len){
155
			throw new QRCodeDataException(sprintf('illegal char at %d', $i + 1));
156
		}
157
158
	}
159
160
	/**
161
	 * @inheritDoc
162
	 *
163
	 * @throws \chillerlan\QRCode\Data\QRCodeDataException
164
	 */
165
	public static function decodeSegment(BitBuffer $bitBuffer, int $versionNumber):string{
166
		$length = $bitBuffer->read(self::getLengthBits($versionNumber));
167
168
		if($bitBuffer->available() < $length * 13){
169
			throw new QRCodeDataException('not enough bits available');  // @codeCoverageIgnore
170
		}
171
172
		// Each character will require 2 bytes. Read the characters as 2-byte pairs and decode as SJIS afterwards
173
		$buffer = [];
174
		$offset = 0;
175
176
		while($length > 0){
177
			// Each 13 bits encodes a 2-byte character
178
			$twoBytes          = $bitBuffer->read(13);
179
			$assembledTwoBytes = ((int)($twoBytes / 0x0c0) << 8) | ($twoBytes % 0x0c0);
180
181
			$assembledTwoBytes += ($assembledTwoBytes < 0x01f00)
182
				? 0x08140  // In the 0x8140 to 0x9FFC range
183
				: 0x0c140; // In the 0xE040 to 0xEBBF range
184
185
			$buffer[$offset]     = chr(0xff & ($assembledTwoBytes >> 8));
186
			$buffer[$offset + 1] = chr(0xff & $assembledTwoBytes);
187
			$offset              += 2;
188
			$length--;
189
		}
190
191
		return mb_convert_encoding(implode($buffer), mb_internal_encoding(), self::sjisEncoding);
0 ignored issues
show
Bug introduced by
It seems like mb_internal_encoding() can also be of type true; however, parameter $to_encoding of mb_convert_encoding() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

191
		return mb_convert_encoding(implode($buffer), /** @scrutinizer ignore-type */ mb_internal_encoding(), self::sjisEncoding);
Loading history...
192
	}
193
194
}
195