Passed
Push — v5 ( 93618e...84eb31 )
by smiley
01:52
created

Decoder::getDataBlocks()   C

Complexity

Conditions 12
Paths 217

Size

Total Lines 66
Code Lines 30

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 12
eloc 30
c 1
b 0
f 0
nc 217
nop 3
dl 0
loc 66
rs 5.9208

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * Class Decoder
4
 *
5
 * @created      17.01.2021
6
 * @author       ZXing Authors
7
 * @author       Smiley <[email protected]>
8
 * @copyright    2021 Smiley
9
 * @license      Apache-2.0
10
 */
11
12
namespace chillerlan\QRCode\Decoder;
13
14
use Exception, InvalidArgumentException, RuntimeException;
15
use chillerlan\QRCode\Common\{BitBuffer, EccLevel, ECICharset, Mode, ReedSolomonDecoder, Version};
16
use chillerlan\QRCode\Data\{AlphaNum, Byte, ECI, Kanji, Number};
17
use chillerlan\QRCode\Detector\Detector;
18
use function count, array_fill, mb_convert_encoding, mb_detect_encoding;
19
20
/**
21
 * <p>The main class which implements QR Code decoding -- as opposed to locating and extracting
22
 * the QR Code from an image.</p>
23
 *
24
 * @author Sean Owen
25
 */
26
final class Decoder{
27
28
#	private const GB2312_SUBSET = 1;
29
30
	/**
31
	 * <p>Decodes a QR Code represented as a {@link \chillerlan\QRCode\Decoder\BitMatrix}.
32
	 * A 1 or "true" is taken to mean a black module.</p>
33
	 *
34
	 * @param \chillerlan\QRCode\Decoder\LuminanceSource $source
35
	 *
36
	 * @return \chillerlan\QRCode\Decoder\DecoderResult text and bytes encoded within the QR Code
37
	 * @throws \Exception if the QR Code cannot be decoded
38
	 */
39
	public function decode(LuminanceSource $source):DecoderResult{
40
		$matrix    = (new Binarizer($source))->getBlackMatrix();
41
		$bitMatrix = (new Detector($matrix))->detect();
42
43
		$fe = null;
0 ignored issues
show
Unused Code introduced by
The assignment to $fe is dead and can be removed.
Loading history...
44
45
		try{
46
			// Construct a parser and read version, error-correction level
47
			// clone the BitMatrix to avoid errors in case we run into mirroring
48
			return $this->decodeParser(new BitMatrixParser(clone $bitMatrix));
49
		}
50
		catch(Exception $e){
51
			$fe = $e;
52
		}
53
54
		try{
55
			$parser = new BitMatrixParser(clone $bitMatrix);
56
57
			// Will be attempting a mirrored reading of the version and format info.
58
			$parser->setMirror(true);
59
60
			// Preemptively read the version.
61
#			$parser->readVersion();
62
63
			// Preemptively read the format information.
64
#			$parser->readFormatInformation();
65
66
			/*
67
			 * Since we're here, this means we have successfully detected some kind
68
			 * of version and format information when mirrored. This is a good sign,
69
			 * that the QR code may be mirrored, and we should try once more with a
70
			 * mirrored content.
71
			 */
72
			// Prepare for a mirrored reading.
73
			$parser->mirror();
74
75
			return $this->decodeParser($parser);
76
		}
77
		catch(Exception $e){
78
			// Throw the exception from the original reading
79
			if($fe instanceof Exception){
0 ignored issues
show
introduced by
$fe is always a sub-type of Exception.
Loading history...
80
				throw $fe;
81
			}
82
83
			throw $e;
84
		}
85
86
	}
87
88
	/**
89
	 * @param \chillerlan\QRCode\Decoder\BitMatrixParser $parser
90
	 *
91
	 * @return \chillerlan\QRCode\Decoder\DecoderResult
92
	 */
93
	private function decodeParser(BitMatrixParser $parser):DecoderResult{
94
		$version  = $parser->readVersion();
95
		$eccLevel = $parser->readFormatInformation()->getErrorCorrectionLevel();
96
97
		// Read raw codewords
98
		$rawCodewords  = $parser->readCodewords();
99
		// Separate into data blocks
100
		$dataBlocks = $this->getDataBlocks($rawCodewords, $version, $eccLevel);
101
102
		$resultBytes  = [];
103
		$resultOffset = 0;
104
105
		// Error-correct and copy data blocks together into a stream of bytes
106
		foreach($dataBlocks as $dataBlock){
107
			[$numDataCodewords, $codewordBytes] = $dataBlock;
108
109
			$corrected = $this->correctErrors($codewordBytes, $numDataCodewords);
110
111
			for($i = 0; $i < $numDataCodewords; $i++){
112
				$resultBytes[$resultOffset++] = $corrected[$i];
113
			}
114
		}
115
116
		// Decode the contents of that stream of bytes
117
		return $this->decodeBitStream($resultBytes, $version, $eccLevel);
118
	}
119
120
	/**
121
	 * <p>When QR Codes use multiple data blocks, they are actually interleaved.
122
	 * That is, the first byte of data block 1 to n is written, then the second bytes, and so on. This
123
	 * method will separate the data into original blocks.</p>
124
	 *
125
	 * @param array                              $rawCodewords bytes as read directly from the QR Code
126
	 * @param \chillerlan\QRCode\Common\Version  $version      version of the QR Code
127
	 * @param \chillerlan\QRCode\Common\EccLevel $eccLevel     error-correction level of the QR Code
128
	 *
129
	 * @return array DataBlocks containing original bytes, "de-interleaved" from representation in the QR Code
130
	 * @throws \InvalidArgumentException
131
	 */
132
	private function getDataBlocks(array $rawCodewords, Version $version, EccLevel $eccLevel):array{
133
134
		if(count($rawCodewords) !== $version->getTotalCodewords()){
135
			throw new InvalidArgumentException('$rawCodewords differ from total codewords for version');
136
		}
137
138
		// Figure out the number and size of data blocks used by this version and
139
		// error correction level
140
		[$numEccCodewords, $eccBlocks] = $version->getRSBlocks($eccLevel);
141
142
		// Now establish DataBlocks of the appropriate size and number of data codewords
143
		$result          = [];//new DataBlock[$totalBlocks];
144
		$numResultBlocks = 0;
145
146
		foreach($eccBlocks as $blockData){
147
			[$numEccBlocks, $eccPerBlock] = $blockData;
148
149
			for($i = 0; $i < $numEccBlocks; $i++, $numResultBlocks++){
150
				$result[$numResultBlocks] = [$eccPerBlock, array_fill(0, $numEccCodewords + $eccPerBlock, 0)];
151
			}
152
		}
153
154
		// All blocks have the same amount of data, except that the last n
155
		// (where n may be 0) have 1 more byte. Figure out where these start.
156
		$shorterBlocksTotalCodewords = count($result[0][1]);
157
		$longerBlocksStartAt         = count($result) - 1;
158
159
		while($longerBlocksStartAt >= 0){
160
			$numCodewords = count($result[$longerBlocksStartAt][1]);
161
162
			if($numCodewords == $shorterBlocksTotalCodewords){
163
				break;
164
			}
165
166
			$longerBlocksStartAt--;
167
		}
168
169
		$longerBlocksStartAt++;
170
171
		$shorterBlocksNumDataCodewords = $shorterBlocksTotalCodewords - $numEccCodewords;
172
		// The last elements of result may be 1 element longer;
173
		// first fill out as many elements as all of them have
174
		$rawCodewordsOffset = 0;
175
176
		for($i = 0; $i < $shorterBlocksNumDataCodewords; $i++){
177
			for($j = 0; $j < $numResultBlocks; $j++){
178
				$result[$j][1][$i] = $rawCodewords[$rawCodewordsOffset++];
179
			}
180
		}
181
182
		// Fill out the last data block in the longer ones
183
		for($j = $longerBlocksStartAt; $j < $numResultBlocks; $j++){
184
			$result[$j][1][$shorterBlocksNumDataCodewords] = $rawCodewords[$rawCodewordsOffset++];
185
		}
186
187
		// Now add in error correction blocks
188
		$max = count($result[0][1]);
189
190
		for($i = $shorterBlocksNumDataCodewords; $i < $max; $i++){
191
			for($j = 0; $j < $numResultBlocks; $j++){
192
				$iOffset                 = $j < $longerBlocksStartAt ? $i : $i + 1;
193
				$result[$j][1][$iOffset] = $rawCodewords[$rawCodewordsOffset++];
194
			}
195
		}
196
197
		return $result;
198
	}
199
200
	/**
201
	 * <p>Given data and error-correction codewords received, possibly corrupted by errors, attempts to
202
	 * correct the errors in-place using Reed-Solomon error correction.</p>
203
	 */
204
	private function correctErrors(array $codewordBytes, int $numDataCodewords):array{
205
		// First read into an array of ints
206
		$codewordsInts = [];
207
208
		foreach($codewordBytes as $i => $codewordByte){
209
			$codewordsInts[$i] = $codewordByte & 0xFF;
210
		}
211
212
		$decoded = (new ReedSolomonDecoder)->decode($codewordsInts, (count($codewordBytes) - $numDataCodewords));
213
214
		// Copy back into array of bytes -- only need to worry about the bytes that were data
215
		// We don't care about errors in the error-correction codewords
216
		for($i = 0; $i < $numDataCodewords; $i++){
217
			$codewordBytes[$i] = $decoded[$i];
218
		}
219
220
		return $codewordBytes;
221
	}
222
223
	/**
224
	 * @throws \RuntimeException
225
	 */
226
	private function decodeBitStream(array $bytes, Version $version, EccLevel $ecLevel):DecoderResult{
227
		$bits           = new BitBuffer($bytes);
228
		$symbolSequence = -1;
229
		$parityData     = -1;
230
		$versionNumber  = $version->getVersionNumber();
231
232
		$result      = '';
233
		$eciCharset  = null;
234
#		$fc1InEffect = false;
235
236
		// While still another segment to read...
237
		while($bits->available() >= 4){
238
			$datamode = $bits->read(4); // mode is encoded by 4 bits
239
240
			// OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
241
			if($datamode === Mode::DATA_TERMINATOR){
242
				break;
243
			}
244
245
			if($datamode === Mode::DATA_ECI){
246
				// Count doesn't apply to ECI
247
				$value      = ECI::parseValue($bits);
248
				$eciCharset = new ECICharset($value);
249
			}
250
			/** @noinspection PhpStatementHasEmptyBodyInspection */
251
			elseif($datamode === Mode::DATA_FNC1_FIRST || $datamode === Mode::DATA_FNC1_SECOND){
252
				// We do little with FNC1 except alter the parsed result a bit according to the spec
253
#				$fc1InEffect = true;
254
			}
255
			elseif($datamode === Mode::DATA_STRCTURED_APPEND){
256
				if($bits->available() < 16){
257
					throw new RuntimeException('structured append: not enough bits left');
258
				}
259
				// sequence number and parity is added later to the result metadata
260
				// Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue
261
				$symbolSequence = $bits->read(8);
262
				$parityData     = $bits->read(8);
263
			}
264
			else{
265
				// First handle Hanzi mode which does not start with character count
266
/*				if($datamode === Mode::DATA_HANZI){
267
					//chinese mode contains a sub set indicator right after mode indicator
268
					$subset = $bits->read(4);
269
					$length = $bits->read(Mode::getLengthBitsForVersion($datamode, $versionNumber));
270
					if($subset === self::GB2312_SUBSET){
271
						$result .= $this->decodeHanziSegment($bits, $length);
272
					}
273
				}*/
274
#				else{
275
					// "Normal" QR code modes:
276
					if($datamode === Mode::DATA_NUMBER){
277
						$result .= Number::decodeSegment($bits, $versionNumber);
278
					}
279
					elseif($datamode === Mode::DATA_ALPHANUM){
280
						$str = AlphaNum::decodeSegment($bits, $versionNumber);
281
282
						// See section 6.4.8.1, 6.4.8.2
283
/*						if($fc1InEffect){
284
							$start = \strlen($str);
285
							// We need to massage the result a bit if in an FNC1 mode:
286
							for($i = $start; $i < $start; $i++){
287
								if($str[$i] === '%'){
288
									if($i < $start - 1 && $str[$i + 1] === '%'){
289
										// %% is rendered as %
290
										$str = \substr_replace($str, '', $i + 1, 1);//deleteCharAt(i + 1);
291
									}
292
#									else{
293
										// In alpha mode, % should be converted to FNC1 separator 0x1D @todo
294
#										$str = setCharAt($i, \chr(0x1D)); // ???
295
#									}
296
								}
297
							}
298
						}
299
*/
300
						$result .= $str;
301
					}
302
					elseif($datamode === Mode::DATA_BYTE){
303
						$str = Byte::decodeSegment($bits, $versionNumber);
304
305
						if($eciCharset !== null){
306
							$encoding = $eciCharset->getName();
307
308
							if($encoding === null){
309
								// The spec isn't clear on this mode; see
310
								// section 6.4.5: t does not say which encoding to assuming
311
								// upon decoding. I have seen ISO-8859-1 used as well as
312
								// Shift_JIS -- without anything like an ECI designator to
313
								// give a hint.
314
								$encoding = mb_detect_encoding($str, ['ISO-8859-1', 'SJIS', 'UTF-8']);
315
							}
316
317
							$eciCharset = null;
318
							$str = mb_convert_encoding($str, $encoding);
319
						}
320
321
						$result .= $str;
322
					}
323
					elseif($datamode === Mode::DATA_KANJI){
324
						$result .= Kanji::decodeSegment($bits, $versionNumber);
325
					}
326
					else{
327
						throw new RuntimeException('invalid data mode');
328
					}
329
#				}
330
			}
331
		}
332
333
		return new DecoderResult($bytes, $result, $version, $ecLevel, $symbolSequence, $parityData);
334
	}
335
336
}
337