Passed
Push — gh-pages ( 20c441...dd59e5 )
by
unknown
02:54 queued 01:00
created

Decoder::correctErrors()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 17
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
eloc 7
c 1
b 0
f 0
nc 4
nop 2
dl 0
loc 17
rs 10
1
<?php
2
/**
3
 * Class Decoder
4
 *
5
 * @created      17.01.2021
6
 * @author       ZXing Authors
7
 * @author       Smiley <[email protected]>
8
 * @copyright    2021 Smiley
9
 * @license      Apache-2.0
10
 */
11
12
namespace chillerlan\QRCode\Decoder;
13
14
use Exception, InvalidArgumentException, RuntimeException;
15
use chillerlan\QRCode\Common\{BitBuffer, EccLevel, Mode, ReedSolomonDecoder, Version};
16
use chillerlan\QRCode\Data\{AlphaNum, Byte, ECI, Kanji, Number};
17
use chillerlan\QRCode\Detector\Detector;
18
use function count, array_fill, mb_convert_encoding, mb_detect_encoding;
19
20
/**
21
 * <p>The main class which implements QR Code decoding -- as opposed to locating and extracting
22
 * the QR Code from an image.</p>
23
 *
24
 * @author Sean Owen
25
 */
26
final class Decoder{
27
28
#	private const GB2312_SUBSET = 1;
29
30
	/**
31
	 * <p>Decodes a QR Code represented as a {@link \chillerlan\QRCode\Decoder\BitMatrix}.
32
	 * A 1 or "true" is taken to mean a black module.</p>
33
	 *
34
	 * @param \chillerlan\QRCode\Decoder\LuminanceSource $source
35
	 *
36
	 * @return \chillerlan\QRCode\Decoder\DecoderResult text and bytes encoded within the QR Code
37
	 * @throws \Exception if the QR Code cannot be decoded
38
	 */
39
	public function decode(LuminanceSource $source):DecoderResult{
40
		$matrix    = (new Binarizer($source))->getBlackMatrix();
41
		$bitMatrix = (new Detector($matrix))->detect();
42
43
		$fe = null;
0 ignored issues
show
Unused Code introduced by
The assignment to $fe is dead and can be removed.
Loading history...
44
45
		try{
46
			// Construct a parser and read version, error-correction level
47
			// clone the BitMatrix to avoid errors in case we run into mirroring
48
			return $this->decodeParser(new BitMatrixParser(clone $bitMatrix));
49
		}
50
		catch(Exception $e){
51
			$fe = $e;
52
		}
53
54
		try{
55
			$parser = new BitMatrixParser(clone $bitMatrix);
56
57
			// Will be attempting a mirrored reading of the version and format info.
58
			$parser->setMirror(true);
59
60
			// Preemptively read the version.
61
#			$parser->readVersion();
62
63
			// Preemptively read the format information.
64
#			$parser->readFormatInformation();
65
66
			/*
67
			 * Since we're here, this means we have successfully detected some kind
68
			 * of version and format information when mirrored. This is a good sign,
69
			 * that the QR code may be mirrored, and we should try once more with a
70
			 * mirrored content.
71
			 */
72
			// Prepare for a mirrored reading.
73
			$parser->mirror();
74
75
			return $this->decodeParser($parser);
76
		}
77
		catch(Exception $e){
78
			// Throw the exception from the original reading
79
			if($fe instanceof Exception){
0 ignored issues
show
introduced by
$fe is always a sub-type of Exception.
Loading history...
80
				throw $fe;
81
			}
82
83
			throw $e;
84
		}
85
86
	}
87
88
	/**
89
	 * @param \chillerlan\QRCode\Decoder\BitMatrixParser $parser
90
	 *
91
	 * @return \chillerlan\QRCode\Decoder\DecoderResult
92
	 */
93
	private function decodeParser(BitMatrixParser $parser):DecoderResult{
94
		$version  = $parser->readVersion();
95
		$eccLevel = $parser->readFormatInformation()->getErrorCorrectionLevel();
96
97
		// Read raw codewords
98
		$rawCodewords  = $parser->readCodewords();
99
		// Separate into data blocks
100
		$dataBlocks = $this->getDataBlocks($rawCodewords, $version, $eccLevel);
101
102
		$resultBytes  = [];
103
		$resultOffset = 0;
104
105
		// Error-correct and copy data blocks together into a stream of bytes
106
		foreach($dataBlocks as $dataBlock){
107
			[$numDataCodewords, $codewordBytes] = $dataBlock;
108
109
			$corrected = $this->correctErrors($codewordBytes, $numDataCodewords);
110
111
			for($i = 0; $i < $numDataCodewords; $i++){
112
				$resultBytes[$resultOffset++] = $corrected[$i];
113
			}
114
		}
115
116
		// Decode the contents of that stream of bytes
117
		return $this->decodeBitStream($resultBytes, $version, $eccLevel);
118
	}
119
120
	/**
121
	 * <p>When QR Codes use multiple data blocks, they are actually interleaved.
122
	 * That is, the first byte of data block 1 to n is written, then the second bytes, and so on. This
123
	 * method will separate the data into original blocks.</p>
124
	 *
125
	 * @param array                              $rawCodewords bytes as read directly from the QR Code
126
	 * @param \chillerlan\QRCode\Common\Version  $version      version of the QR Code
127
	 * @param \chillerlan\QRCode\Common\EccLevel $eccLevel     error-correction level of the QR Code
128
	 *
129
	 * @return array DataBlocks containing original bytes, "de-interleaved" from representation in the QR Code
130
	 * @throws \InvalidArgumentException
131
	 */
132
	private function getDataBlocks(array $rawCodewords, Version $version, EccLevel $eccLevel):array{
133
134
		if(count($rawCodewords) !== $version->getTotalCodewords()){
135
			throw new InvalidArgumentException('$rawCodewords differ from total codewords for version');
136
		}
137
138
		// Figure out the number and size of data blocks used by this version and
139
		// error correction level
140
		[$numEccCodewords, $eccBlocks] = $version->getRSBlocks($eccLevel);
141
142
		// Now establish DataBlocks of the appropriate size and number of data codewords
143
		$result          = [];//new DataBlock[$totalBlocks];
144
		$numResultBlocks = 0;
145
146
		foreach($eccBlocks as $blockData){
147
			[$numEccBlocks, $eccPerBlock] = $blockData;
148
149
			for($i = 0; $i < $numEccBlocks; $i++, $numResultBlocks++){
150
				$result[$numResultBlocks] = [$eccPerBlock, array_fill(0, $numEccCodewords + $eccPerBlock, 0)];
151
			}
152
		}
153
154
		// All blocks have the same amount of data, except that the last n
155
		// (where n may be 0) have 1 more byte. Figure out where these start.
156
		/** @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset */
157
		$shorterBlocksTotalCodewords = count($result[0][1]);
158
		$longerBlocksStartAt         = count($result) - 1;
159
160
		while($longerBlocksStartAt >= 0){
161
			$numCodewords = count($result[$longerBlocksStartAt][1]);
162
163
			if($numCodewords == $shorterBlocksTotalCodewords){
164
				break;
165
			}
166
167
			$longerBlocksStartAt--;
168
		}
169
170
		$longerBlocksStartAt++;
171
172
		$shorterBlocksNumDataCodewords = $shorterBlocksTotalCodewords - $numEccCodewords;
173
		// The last elements of result may be 1 element longer;
174
		// first fill out as many elements as all of them have
175
		$rawCodewordsOffset = 0;
176
177
		for($i = 0; $i < $shorterBlocksNumDataCodewords; $i++){
178
			for($j = 0; $j < $numResultBlocks; $j++){
179
				$result[$j][1][$i] = $rawCodewords[$rawCodewordsOffset++];
180
			}
181
		}
182
183
		// Fill out the last data block in the longer ones
184
		for($j = $longerBlocksStartAt; $j < $numResultBlocks; $j++){
185
			$result[$j][1][$shorterBlocksNumDataCodewords] = $rawCodewords[$rawCodewordsOffset++];
186
		}
187
188
		// Now add in error correction blocks
189
		/** @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset */
190
		$max = count($result[0][1]);
191
192
		for($i = $shorterBlocksNumDataCodewords; $i < $max; $i++){
193
			for($j = 0; $j < $numResultBlocks; $j++){
194
				$iOffset                 = $j < $longerBlocksStartAt ? $i : $i + 1;
195
				$result[$j][1][$iOffset] = $rawCodewords[$rawCodewordsOffset++];
196
			}
197
		}
198
199
		return $result;
200
	}
201
202
	/**
203
	 * <p>Given data and error-correction codewords received, possibly corrupted by errors, attempts to
204
	 * correct the errors in-place using Reed-Solomon error correction.</p>
205
	 */
206
	private function correctErrors(array $codewordBytes, int $numDataCodewords):array{
207
		// First read into an array of ints
208
		$codewordsInts = [];
209
210
		foreach($codewordBytes as $i => $codewordByte){
211
			$codewordsInts[$i] = $codewordByte & 0xFF;
212
		}
213
214
		$decoded = (new ReedSolomonDecoder)->decode($codewordsInts, (count($codewordBytes) - $numDataCodewords));
215
216
		// Copy back into array of bytes -- only need to worry about the bytes that were data
217
		// We don't care about errors in the error-correction codewords
218
		for($i = 0; $i < $numDataCodewords; $i++){
219
			$codewordBytes[$i] = $decoded[$i];
220
		}
221
222
		return $codewordBytes;
223
	}
224
225
	/**
226
	 * @throws \RuntimeException
227
	 */
228
	private function decodeBitStream(array $bytes, Version $version, EccLevel $ecLevel):DecoderResult{
229
		$bits           = new BitBuffer($bytes);
230
		$symbolSequence = -1;
231
		$parityData     = -1;
232
		$versionNumber  = $version->getVersionNumber();
233
234
		$result      = '';
235
		$eciCharset  = null;
236
#		$fc1InEffect = false;
237
238
		// While still another segment to read...
239
		while($bits->available() >= 4){
240
			$datamode = $bits->read(4); // mode is encoded by 4 bits
241
242
			// OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
243
			if($datamode === Mode::DATA_TERMINATOR){
244
				break;
245
			}
246
247
			if($datamode === Mode::DATA_ECI){
248
				// Count doesn't apply to ECI
249
				$eciCharset = ECI::parseValue($bits);
250
			}
251
			/** @noinspection PhpStatementHasEmptyBodyInspection */
252
			elseif($datamode === Mode::DATA_FNC1_FIRST || $datamode === Mode::DATA_FNC1_SECOND){
253
				// We do little with FNC1 except alter the parsed result a bit according to the spec
254
#				$fc1InEffect = true;
255
			}
256
			elseif($datamode === Mode::DATA_STRCTURED_APPEND){
257
				if($bits->available() < 16){
258
					throw new RuntimeException('structured append: not enough bits left');
259
				}
260
				// sequence number and parity is added later to the result metadata
261
				// Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue
262
				$symbolSequence = $bits->read(8);
263
				$parityData     = $bits->read(8);
264
			}
265
			else{
266
				// First handle Hanzi mode which does not start with character count
267
/*				if($datamode === Mode::DATA_HANZI){
268
					//chinese mode contains a sub set indicator right after mode indicator
269
					$subset = $bits->read(4);
270
					$length = $bits->read(Mode::getLengthBitsForVersion($datamode, $versionNumber));
271
					if($subset === self::GB2312_SUBSET){
272
						$result .= $this->decodeHanziSegment($bits, $length);
273
					}
274
				}*/
275
#				else{
276
					// "Normal" QR code modes:
277
					if($datamode === Mode::DATA_NUMBER){
278
						$result .= Number::decodeSegment($bits, $versionNumber);
279
					}
280
					elseif($datamode === Mode::DATA_ALPHANUM){
281
						$str = AlphaNum::decodeSegment($bits, $versionNumber);
282
283
						// See section 6.4.8.1, 6.4.8.2
284
/*						if($fc1InEffect){
285
							$start = \strlen($str);
286
							// We need to massage the result a bit if in an FNC1 mode:
287
							for($i = $start; $i < $start; $i++){
288
								if($str[$i] === '%'){
289
									if($i < $start - 1 && $str[$i + 1] === '%'){
290
										// %% is rendered as %
291
										$str = \substr_replace($str, '', $i + 1, 1);//deleteCharAt(i + 1);
292
									}
293
#									else{
294
										// In alpha mode, % should be converted to FNC1 separator 0x1D @todo
295
#										$str = setCharAt($i, \chr(0x1D)); // ???
296
#									}
297
								}
298
							}
299
						}
300
*/
301
						$result .= $str;
302
					}
303
					elseif($datamode === Mode::DATA_BYTE){
304
						$str = Byte::decodeSegment($bits, $versionNumber);
305
306
						if($eciCharset !== null){
307
							$encoding = $eciCharset->getName();
308
309
							if($encoding === null){
310
								// The spec isn't clear on this mode; see
311
								// section 6.4.5: t does not say which encoding to assuming
312
								// upon decoding. I have seen ISO-8859-1 used as well as
313
								// Shift_JIS -- without anything like an ECI designator to
314
								// give a hint.
315
								$encoding = mb_detect_encoding($str, ['ISO-8859-1', 'SJIS', 'UTF-8']);
316
							}
317
318
							$eciCharset = null;
319
							$str = mb_convert_encoding($str, $encoding);
320
						}
321
322
						$result .= $str;
323
					}
324
					elseif($datamode === Mode::DATA_KANJI){
325
						$result .= Kanji::decodeSegment($bits, $versionNumber);
326
					}
327
					else{
328
						throw new RuntimeException('invalid data mode');
329
					}
330
#				}
331
			}
332
		}
333
334
		return new DecoderResult($bytes, $result, $version, $ecLevel, $symbolSequence, $parityData);
335
	}
336
337
}
338