Passed
Push — main ( 48b6c2...5c2c92 )
by smiley
02:24
created

Decoder::getDataBlocks()   C

Complexity

Conditions 12
Paths 217

Size

Total Lines 68
Code Lines 30

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 12
eloc 30
c 1
b 0
f 0
nc 217
nop 3
dl 0
loc 68
rs 5.9208

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * Class Decoder
4
 *
5
 * @created      17.01.2021
6
 * @author       ZXing Authors
7
 * @author       Smiley <[email protected]>
8
 * @copyright    2021 Smiley
9
 * @license      Apache-2.0
10
 */
11
12
namespace chillerlan\QRCode\Decoder;
13
14
use InvalidArgumentException, RuntimeException, Throwable;
15
use chillerlan\QRCode\Common\{BitBuffer, EccLevel, Mode, ReedSolomonDecoder, Version};
16
use chillerlan\QRCode\Data\{AlphaNum, Byte, ECI, Kanji, Number};
17
use chillerlan\QRCode\Detector\Detector;
18
use function count, array_fill, mb_convert_encoding, mb_detect_encoding;
19
20
/**
21
 * <p>The main class which implements QR Code decoding -- as opposed to locating and extracting
22
 * the QR Code from an image.</p>
23
 *
24
 * @author Sean Owen
25
 */
26
final class Decoder{
27
28
#	private const GB2312_SUBSET = 1;
29
30
	/**
31
	 * <p>Decodes a QR Code represented as a {@link \chillerlan\QRCode\Decoder\BitMatrix}.
32
	 * A 1 or "true" is taken to mean a black module.</p>
33
	 *
34
	 * @param \chillerlan\QRCode\Decoder\LuminanceSourceInterface $source
35
	 *
36
	 * @return \chillerlan\QRCode\Decoder\DecoderResult text and bytes encoded within the QR Code
37
	 * @throws \Throwable if the QR Code cannot be decoded
38
	 */
39
	public function decode(LuminanceSourceInterface $source):DecoderResult{
40
		$bitMatrix = (new Detector($source))->detect();
41
42
		try{
43
			// clone the BitMatrix to avoid errors in case we run into mirroring
44
			return $this->decodeMatrix(clone $bitMatrix);
45
		}
46
		catch(Throwable $e){
47
48
			try{
49
				/*
50
				 * Prepare for a mirrored reading.
51
				 *
52
				 * Since we're here, this means we have successfully detected some kind
53
				 * of version and format information when mirrored. This is a good sign,
54
				 * that the QR code may be mirrored, and we should try once more with a
55
				 * mirrored content.
56
				 */
57
				return $this->decodeMatrix($bitMatrix->setMirror(true)->mirror());
58
			}
59
			catch(Throwable $f){
60
				// Throw the exception from the original reading
61
				throw $e;
62
			}
63
64
		}
65
66
	}
67
68
	/**
69
	 * @param \chillerlan\QRCode\Decoder\BitMatrix $bitMatrix
70
	 *
71
	 * @return \chillerlan\QRCode\Decoder\DecoderResult
72
	 */
73
	private function decodeMatrix(BitMatrix $bitMatrix):DecoderResult{
74
		// Read raw codewords
75
		$rawCodewords = $bitMatrix->readCodewords();
76
		$version      = $bitMatrix->getVersion();
77
		$formatInfo   = $bitMatrix->getFormatInfo();
78
79
		// technically this shouldn't happen as the respective read meathods would throw first
80
		if($version === null || $formatInfo === null){
81
			throw new RuntimeException('unable to read version or ecc level');
82
		}
83
84
		$eccLevel = $formatInfo->getErrorCorrectionLevel();
85
86
		// Separate into data blocks
87
		$dataBlocks = $this->getDataBlocks($rawCodewords, $version, $eccLevel);
88
89
		$resultBytes  = [];
90
		$resultOffset = 0;
91
92
		// Error-correct and copy data blocks together into a stream of bytes
93
		foreach($dataBlocks as $dataBlock){
94
			[$numDataCodewords, $codewordBytes] = $dataBlock;
95
96
			$corrected = $this->correctErrors($codewordBytes, $numDataCodewords);
97
98
			for($i = 0; $i < $numDataCodewords; $i++){
99
				$resultBytes[$resultOffset++] = $corrected[$i];
100
			}
101
		}
102
103
		// Decode the contents of that stream of bytes
104
		return $this->decodeBitStream($resultBytes, $version, $eccLevel);
105
	}
106
107
	/**
108
	 * <p>When QR Codes use multiple data blocks, they are actually interleaved.
109
	 * That is, the first byte of data block 1 to n is written, then the second bytes, and so on. This
110
	 * method will separate the data into original blocks.</p>
111
	 *
112
	 * @param array                              $rawCodewords bytes as read directly from the QR Code
113
	 * @param \chillerlan\QRCode\Common\Version  $version      version of the QR Code
114
	 * @param \chillerlan\QRCode\Common\EccLevel $eccLevel     error-correction level of the QR Code
115
	 *
116
	 * @return array DataBlocks containing original bytes, "de-interleaved" from representation in the QR Code
117
	 * @throws \InvalidArgumentException
118
	 */
119
	private function getDataBlocks(array $rawCodewords, Version $version, EccLevel $eccLevel):array{
120
121
		if(count($rawCodewords) !== $version->getTotalCodewords()){
122
			throw new InvalidArgumentException('$rawCodewords differ from total codewords for version');
123
		}
124
125
		// Figure out the number and size of data blocks used by this version and
126
		// error correction level
127
		[$numEccCodewords, $eccBlocks] = $version->getRSBlocks($eccLevel);
128
129
		// Now establish DataBlocks of the appropriate size and number of data codewords
130
		$result          = [];//new DataBlock[$totalBlocks];
131
		$numResultBlocks = 0;
132
133
		foreach($eccBlocks as $blockData){
134
			[$numEccBlocks, $eccPerBlock] = $blockData;
135
136
			for($i = 0; $i < $numEccBlocks; $i++, $numResultBlocks++){
137
				$result[$numResultBlocks] = [$eccPerBlock, array_fill(0, $numEccCodewords + $eccPerBlock, 0)];
138
			}
139
		}
140
141
		// All blocks have the same amount of data, except that the last n
142
		// (where n may be 0) have 1 more byte. Figure out where these start.
143
		/** @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset */
144
		$shorterBlocksTotalCodewords = count($result[0][1]);
145
		$longerBlocksStartAt         = count($result) - 1;
146
147
		while($longerBlocksStartAt >= 0){
148
			$numCodewords = count($result[$longerBlocksStartAt][1]);
149
150
			if($numCodewords == $shorterBlocksTotalCodewords){
151
				break;
152
			}
153
154
			$longerBlocksStartAt--;
155
		}
156
157
		$longerBlocksStartAt++;
158
159
		$shorterBlocksNumDataCodewords = $shorterBlocksTotalCodewords - $numEccCodewords;
160
		// The last elements of result may be 1 element longer;
161
		// first fill out as many elements as all of them have
162
		$rawCodewordsOffset = 0;
163
164
		for($i = 0; $i < $shorterBlocksNumDataCodewords; $i++){
165
			for($j = 0; $j < $numResultBlocks; $j++){
166
				$result[$j][1][$i] = $rawCodewords[$rawCodewordsOffset++];
167
			}
168
		}
169
170
		// Fill out the last data block in the longer ones
171
		for($j = $longerBlocksStartAt; $j < $numResultBlocks; $j++){
172
			$result[$j][1][$shorterBlocksNumDataCodewords] = $rawCodewords[$rawCodewordsOffset++];
173
		}
174
175
		// Now add in error correction blocks
176
		/** @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset */
177
		$max = count($result[0][1]);
178
179
		for($i = $shorterBlocksNumDataCodewords; $i < $max; $i++){
180
			for($j = 0; $j < $numResultBlocks; $j++){
181
				$iOffset                 = $j < $longerBlocksStartAt ? $i : $i + 1;
182
				$result[$j][1][$iOffset] = $rawCodewords[$rawCodewordsOffset++];
183
			}
184
		}
185
186
		return $result;
187
	}
188
189
	/**
190
	 * <p>Given data and error-correction codewords received, possibly corrupted by errors, attempts to
191
	 * correct the errors in-place using Reed-Solomon error correction.</p>
192
	 */
193
	private function correctErrors(array $codewordBytes, int $numDataCodewords):array{
194
		// First read into an array of ints
195
		$codewordsInts = [];
196
197
		foreach($codewordBytes as $i => $codewordByte){
198
			$codewordsInts[$i] = $codewordByte & 0xFF;
199
		}
200
201
		$decoded = (new ReedSolomonDecoder)->decode($codewordsInts, (count($codewordBytes) - $numDataCodewords));
202
203
		// Copy back into array of bytes -- only need to worry about the bytes that were data
204
		// We don't care about errors in the error-correction codewords
205
		for($i = 0; $i < $numDataCodewords; $i++){
206
			$codewordBytes[$i] = $decoded[$i];
207
		}
208
209
		return $codewordBytes;
210
	}
211
212
	/**
213
	 * @throws \RuntimeException
214
	 */
215
	private function decodeBitStream(array $bytes, Version $version, EccLevel $ecLevel):DecoderResult{
216
		$bits           = new BitBuffer($bytes);
217
		$symbolSequence = -1;
218
		$parityData     = -1;
219
		$versionNumber  = $version->getVersionNumber();
220
221
		$result      = '';
222
		$eciCharset  = null;
223
#		$fc1InEffect = false;
224
225
		// While still another segment to read...
226
		while($bits->available() >= 4){
227
			$datamode = $bits->read(4); // mode is encoded by 4 bits
228
229
			// OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
230
			if($datamode === Mode::TERMINATOR){
231
				break;
232
			}
233
234
			if($datamode === Mode::ECI){
235
				// Count doesn't apply to ECI
236
				$eciCharset = ECI::parseValue($bits);
237
			}
238
			/** @noinspection PhpStatementHasEmptyBodyInspection */
239
			elseif($datamode === Mode::FNC1_FIRST || $datamode === Mode::FNC1_SECOND){
240
				// We do little with FNC1 except alter the parsed result a bit according to the spec
241
#				$fc1InEffect = true;
242
			}
243
			elseif($datamode === Mode::STRCTURED_APPEND){
244
				if($bits->available() < 16){
245
					throw new RuntimeException('structured append: not enough bits left');
246
				}
247
				// sequence number and parity is added later to the result metadata
248
				// Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue
249
				$symbolSequence = $bits->read(8);
250
				$parityData     = $bits->read(8);
251
			}
252
			else{
253
				// First handle Hanzi mode which does not start with character count
254
/*				if($datamode === Mode::DATA_HANZI){
255
					//chinese mode contains a sub set indicator right after mode indicator
256
					$subset = $bits->read(4);
257
					$length = $bits->read(Mode::getLengthBitsForVersion($datamode, $versionNumber));
258
					if($subset === self::GB2312_SUBSET){
259
						$result .= $this->decodeHanziSegment($bits, $length);
260
					}
261
				}*/
262
#				else{
263
					// "Normal" QR code modes:
264
					if($datamode === Mode::NUMBER){
265
						$result .= Number::decodeSegment($bits, $versionNumber);
266
					}
267
					elseif($datamode === Mode::ALPHANUM){
268
						$str = AlphaNum::decodeSegment($bits, $versionNumber);
269
270
						// See section 6.4.8.1, 6.4.8.2
271
/*						if($fc1InEffect){
272
							$start = \strlen($str);
273
							// We need to massage the result a bit if in an FNC1 mode:
274
							for($i = $start; $i < $start; $i++){
275
								if($str[$i] === '%'){
276
									if($i < $start - 1 && $str[$i + 1] === '%'){
277
										// %% is rendered as %
278
										$str = \substr_replace($str, '', $i + 1, 1);//deleteCharAt(i + 1);
279
									}
280
#									else{
281
										// In alpha mode, % should be converted to FNC1 separator 0x1D @todo
282
#										$str = setCharAt($i, \chr(0x1D)); // ???
283
#									}
284
								}
285
							}
286
						}
287
*/
288
						$result .= $str;
289
					}
290
					elseif($datamode === Mode::BYTE){
291
						$str = Byte::decodeSegment($bits, $versionNumber);
292
293
						if($eciCharset !== null){
294
							$encoding = $eciCharset->getName();
295
296
							if($encoding === null){
297
								// The spec isn't clear on this mode; see
298
								// section 6.4.5: t does not say which encoding to assuming
299
								// upon decoding. I have seen ISO-8859-1 used as well as
300
								// Shift_JIS -- without anything like an ECI designator to
301
								// give a hint.
302
								$encoding = mb_detect_encoding($str, ['ISO-8859-1', 'SJIS', 'UTF-8']);
303
							}
304
305
							$eciCharset = null;
306
							$str = mb_convert_encoding($str, $encoding);
307
						}
308
309
						$result .= $str;
310
					}
311
					elseif($datamode === Mode::KANJI){
312
						$result .= Kanji::decodeSegment($bits, $versionNumber);
313
					}
314
					else{
315
						throw new RuntimeException('invalid data mode');
316
					}
317
#				}
318
			}
319
		}
320
321
		return new DecoderResult([
322
			'rawBytes'                 => $bytes,
323
			'text'                     => $result,
324
			'version'                  => $version,
325
			'eccLevel'                 => $ecLevel,
326
			'structuredAppendParity'   => $parityData,
327
			'structuredAppendSequence' => $symbolSequence
328
		]);
329
	}
330
331
}
332