1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* Class Decoder |
4
|
|
|
* |
5
|
|
|
* @created 17.01.2021 |
6
|
|
|
* @author ZXing Authors |
7
|
|
|
* @author Smiley <[email protected]> |
8
|
|
|
* @copyright 2021 Smiley |
9
|
|
|
* @license Apache-2.0 |
10
|
|
|
*/ |
11
|
|
|
|
12
|
|
|
namespace chillerlan\QRCode\Decoder; |
13
|
|
|
|
14
|
|
|
use Exception, InvalidArgumentException, RuntimeException; |
15
|
|
|
use chillerlan\QRCode\Common\{BitBuffer, EccLevel, ECICharset, Mode, ReedSolomonDecoder, Version}; |
16
|
|
|
use chillerlan\QRCode\Data\{AlphaNum, Byte, ECI, Kanji, Number}; |
17
|
|
|
use chillerlan\QRCode\Detector\Detector; |
18
|
|
|
use function count, array_fill, mb_convert_encoding, mb_detect_encoding; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* <p>The main class which implements QR Code decoding -- as opposed to locating and extracting |
22
|
|
|
* the QR Code from an image.</p> |
23
|
|
|
* |
24
|
|
|
* @author Sean Owen |
25
|
|
|
*/ |
26
|
|
|
final class Decoder{ |
27
|
|
|
|
28
|
|
|
# private const GB2312_SUBSET = 1; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* <p>Decodes a QR Code represented as a {@link \chillerlan\QRCode\Decoder\BitMatrix}. |
32
|
|
|
* A 1 or "true" is taken to mean a black module.</p> |
33
|
|
|
* |
34
|
|
|
* @param \chillerlan\QRCode\Decoder\LuminanceSource $source |
35
|
|
|
* |
36
|
|
|
* @return \chillerlan\QRCode\Decoder\DecoderResult text and bytes encoded within the QR Code |
37
|
|
|
* @throws \Exception if the QR Code cannot be decoded |
38
|
|
|
*/ |
39
|
|
|
public function decode(LuminanceSource $source):DecoderResult{ |
40
|
|
|
$matrix = (new Binarizer($source))->getBlackMatrix(); |
41
|
|
|
$bitMatrix = (new Detector($matrix))->detect(); |
42
|
|
|
|
43
|
|
|
$fe = null; |
|
|
|
|
44
|
|
|
|
45
|
|
|
try{ |
46
|
|
|
// Construct a parser and read version, error-correction level |
47
|
|
|
// clone the BitMatrix to avoid errors in case we run into mirroring |
48
|
|
|
return $this->decodeParser(new BitMatrixParser(clone $bitMatrix)); |
49
|
|
|
} |
50
|
|
|
catch(Exception $e){ |
51
|
|
|
$fe = $e; |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
try{ |
55
|
|
|
$parser = new BitMatrixParser(clone $bitMatrix); |
56
|
|
|
|
57
|
|
|
// Will be attempting a mirrored reading of the version and format info. |
58
|
|
|
$parser->setMirror(true); |
59
|
|
|
|
60
|
|
|
// Preemptively read the version. |
61
|
|
|
# $parser->readVersion(); |
62
|
|
|
|
63
|
|
|
// Preemptively read the format information. |
64
|
|
|
# $parser->readFormatInformation(); |
65
|
|
|
|
66
|
|
|
/* |
67
|
|
|
* Since we're here, this means we have successfully detected some kind |
68
|
|
|
* of version and format information when mirrored. This is a good sign, |
69
|
|
|
* that the QR code may be mirrored, and we should try once more with a |
70
|
|
|
* mirrored content. |
71
|
|
|
*/ |
72
|
|
|
// Prepare for a mirrored reading. |
73
|
|
|
$parser->mirror(); |
74
|
|
|
|
75
|
|
|
return $this->decodeParser($parser); |
76
|
|
|
} |
77
|
|
|
catch(Exception $e){ |
78
|
|
|
// Throw the exception from the original reading |
79
|
|
|
if($fe instanceof Exception){ |
|
|
|
|
80
|
|
|
throw $fe; |
81
|
|
|
} |
82
|
|
|
|
83
|
|
|
throw $e; |
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
} |
87
|
|
|
|
88
|
|
|
/** |
89
|
|
|
* @param \chillerlan\QRCode\Decoder\BitMatrixParser $parser |
90
|
|
|
* |
91
|
|
|
* @return \chillerlan\QRCode\Decoder\DecoderResult |
92
|
|
|
*/ |
93
|
|
|
private function decodeParser(BitMatrixParser $parser):DecoderResult{ |
94
|
|
|
$version = $parser->readVersion(); |
95
|
|
|
$eccLevel = $parser->readFormatInformation()->getErrorCorrectionLevel(); |
96
|
|
|
|
97
|
|
|
// Read raw codewords |
98
|
|
|
$rawCodewords = $parser->readCodewords(); |
99
|
|
|
// Separate into data blocks |
100
|
|
|
$dataBlocks = $this->getDataBlocks($rawCodewords, $version, $eccLevel); |
101
|
|
|
|
102
|
|
|
$resultBytes = []; |
103
|
|
|
$resultOffset = 0; |
104
|
|
|
|
105
|
|
|
// Error-correct and copy data blocks together into a stream of bytes |
106
|
|
|
foreach($dataBlocks as $dataBlock){ |
107
|
|
|
[$numDataCodewords, $codewordBytes] = $dataBlock; |
108
|
|
|
|
109
|
|
|
$corrected = $this->correctErrors($codewordBytes, $numDataCodewords); |
110
|
|
|
|
111
|
|
|
for($i = 0; $i < $numDataCodewords; $i++){ |
112
|
|
|
$resultBytes[$resultOffset++] = $corrected[$i]; |
113
|
|
|
} |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
// Decode the contents of that stream of bytes |
117
|
|
|
return $this->decodeBitStream($resultBytes, $version, $eccLevel); |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
/** |
121
|
|
|
* <p>When QR Codes use multiple data blocks, they are actually interleaved. |
122
|
|
|
* That is, the first byte of data block 1 to n is written, then the second bytes, and so on. This |
123
|
|
|
* method will separate the data into original blocks.</p> |
124
|
|
|
* |
125
|
|
|
* @param array $rawCodewords bytes as read directly from the QR Code |
126
|
|
|
* @param \chillerlan\QRCode\Common\Version $version version of the QR Code |
127
|
|
|
* @param \chillerlan\QRCode\Common\EccLevel $eccLevel error-correction level of the QR Code |
128
|
|
|
* |
129
|
|
|
* @return array DataBlocks containing original bytes, "de-interleaved" from representation in the QR Code |
130
|
|
|
* @throws \InvalidArgumentException |
131
|
|
|
*/ |
132
|
|
|
private function getDataBlocks(array $rawCodewords, Version $version, EccLevel $eccLevel):array{ |
133
|
|
|
|
134
|
|
|
if(count($rawCodewords) !== $version->getTotalCodewords()){ |
135
|
|
|
throw new InvalidArgumentException('$rawCodewords differ from total codewords for version'); |
136
|
|
|
} |
137
|
|
|
|
138
|
|
|
// Figure out the number and size of data blocks used by this version and |
139
|
|
|
// error correction level |
140
|
|
|
[$numEccCodewords, $eccBlocks] = $version->getRSBlocks($eccLevel); |
141
|
|
|
|
142
|
|
|
// Now establish DataBlocks of the appropriate size and number of data codewords |
143
|
|
|
$result = [];//new DataBlock[$totalBlocks]; |
144
|
|
|
$numResultBlocks = 0; |
145
|
|
|
|
146
|
|
|
foreach($eccBlocks as $blockData){ |
147
|
|
|
[$numEccBlocks, $eccPerBlock] = $blockData; |
148
|
|
|
|
149
|
|
|
for($i = 0; $i < $numEccBlocks; $i++, $numResultBlocks++){ |
150
|
|
|
$result[$numResultBlocks] = [$eccPerBlock, array_fill(0, $numEccCodewords + $eccPerBlock, 0)]; |
151
|
|
|
} |
152
|
|
|
} |
153
|
|
|
|
154
|
|
|
// All blocks have the same amount of data, except that the last n |
155
|
|
|
// (where n may be 0) have 1 more byte. Figure out where these start. |
156
|
|
|
$shorterBlocksTotalCodewords = count($result[0][1]); |
157
|
|
|
$longerBlocksStartAt = count($result) - 1; |
158
|
|
|
|
159
|
|
|
while($longerBlocksStartAt >= 0){ |
160
|
|
|
$numCodewords = count($result[$longerBlocksStartAt][1]); |
161
|
|
|
|
162
|
|
|
if($numCodewords == $shorterBlocksTotalCodewords){ |
163
|
|
|
break; |
164
|
|
|
} |
165
|
|
|
|
166
|
|
|
$longerBlocksStartAt--; |
167
|
|
|
} |
168
|
|
|
|
169
|
|
|
$longerBlocksStartAt++; |
170
|
|
|
|
171
|
|
|
$shorterBlocksNumDataCodewords = $shorterBlocksTotalCodewords - $numEccCodewords; |
172
|
|
|
// The last elements of result may be 1 element longer; |
173
|
|
|
// first fill out as many elements as all of them have |
174
|
|
|
$rawCodewordsOffset = 0; |
175
|
|
|
|
176
|
|
|
for($i = 0; $i < $shorterBlocksNumDataCodewords; $i++){ |
177
|
|
|
for($j = 0; $j < $numResultBlocks; $j++){ |
178
|
|
|
$result[$j][1][$i] = $rawCodewords[$rawCodewordsOffset++]; |
179
|
|
|
} |
180
|
|
|
} |
181
|
|
|
|
182
|
|
|
// Fill out the last data block in the longer ones |
183
|
|
|
for($j = $longerBlocksStartAt; $j < $numResultBlocks; $j++){ |
184
|
|
|
$result[$j][1][$shorterBlocksNumDataCodewords] = $rawCodewords[$rawCodewordsOffset++]; |
185
|
|
|
} |
186
|
|
|
|
187
|
|
|
// Now add in error correction blocks |
188
|
|
|
$max = count($result[0][1]); |
189
|
|
|
|
190
|
|
|
for($i = $shorterBlocksNumDataCodewords; $i < $max; $i++){ |
191
|
|
|
for($j = 0; $j < $numResultBlocks; $j++){ |
192
|
|
|
$iOffset = $j < $longerBlocksStartAt ? $i : $i + 1; |
193
|
|
|
$result[$j][1][$iOffset] = $rawCodewords[$rawCodewordsOffset++]; |
194
|
|
|
} |
195
|
|
|
} |
196
|
|
|
|
197
|
|
|
return $result; |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
/** |
201
|
|
|
* <p>Given data and error-correction codewords received, possibly corrupted by errors, attempts to |
202
|
|
|
* correct the errors in-place using Reed-Solomon error correction.</p> |
203
|
|
|
*/ |
204
|
|
|
private function correctErrors(array $codewordBytes, int $numDataCodewords):array{ |
205
|
|
|
// First read into an array of ints |
206
|
|
|
$codewordsInts = []; |
207
|
|
|
|
208
|
|
|
foreach($codewordBytes as $i => $codewordByte){ |
209
|
|
|
$codewordsInts[$i] = $codewordByte & 0xFF; |
210
|
|
|
} |
211
|
|
|
|
212
|
|
|
$decoded = (new ReedSolomonDecoder)->decode($codewordsInts, (count($codewordBytes) - $numDataCodewords)); |
213
|
|
|
|
214
|
|
|
// Copy back into array of bytes -- only need to worry about the bytes that were data |
215
|
|
|
// We don't care about errors in the error-correction codewords |
216
|
|
|
for($i = 0; $i < $numDataCodewords; $i++){ |
217
|
|
|
$codewordBytes[$i] = $decoded[$i]; |
218
|
|
|
} |
219
|
|
|
|
220
|
|
|
return $codewordBytes; |
221
|
|
|
} |
222
|
|
|
|
223
|
|
|
/** |
224
|
|
|
* @throws \RuntimeException |
225
|
|
|
*/ |
226
|
|
|
private function decodeBitStream(array $bytes, Version $version, EccLevel $ecLevel):DecoderResult{ |
227
|
|
|
$bits = new BitBuffer($bytes); |
228
|
|
|
$symbolSequence = -1; |
229
|
|
|
$parityData = -1; |
230
|
|
|
$versionNumber = $version->getVersionNumber(); |
231
|
|
|
|
232
|
|
|
$result = ''; |
233
|
|
|
$eciCharset = null; |
234
|
|
|
# $fc1InEffect = false; |
235
|
|
|
|
236
|
|
|
// While still another segment to read... |
237
|
|
|
while($bits->available() >= 4){ |
238
|
|
|
$datamode = $bits->read(4); // mode is encoded by 4 bits |
239
|
|
|
|
240
|
|
|
// OK, assume we're done. Really, a TERMINATOR mode should have been recorded here |
241
|
|
|
if($datamode === Mode::DATA_TERMINATOR){ |
242
|
|
|
break; |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
if($datamode === Mode::DATA_ECI){ |
246
|
|
|
// Count doesn't apply to ECI |
247
|
|
|
$value = ECI::parseValue($bits); |
248
|
|
|
$eciCharset = new ECICharset($value); |
249
|
|
|
} |
250
|
|
|
/** @noinspection PhpStatementHasEmptyBodyInspection */ |
251
|
|
|
elseif($datamode === Mode::DATA_FNC1_FIRST || $datamode === Mode::DATA_FNC1_SECOND){ |
252
|
|
|
// We do little with FNC1 except alter the parsed result a bit according to the spec |
253
|
|
|
# $fc1InEffect = true; |
254
|
|
|
} |
255
|
|
|
elseif($datamode === Mode::DATA_STRCTURED_APPEND){ |
256
|
|
|
if($bits->available() < 16){ |
257
|
|
|
throw new RuntimeException('structured append: not enough bits left'); |
258
|
|
|
} |
259
|
|
|
// sequence number and parity is added later to the result metadata |
260
|
|
|
// Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue |
261
|
|
|
$symbolSequence = $bits->read(8); |
262
|
|
|
$parityData = $bits->read(8); |
263
|
|
|
} |
264
|
|
|
else{ |
265
|
|
|
// First handle Hanzi mode which does not start with character count |
266
|
|
|
/* if($datamode === Mode::DATA_HANZI){ |
267
|
|
|
//chinese mode contains a sub set indicator right after mode indicator |
268
|
|
|
$subset = $bits->read(4); |
269
|
|
|
$length = $bits->read(Mode::getLengthBitsForVersion($datamode, $versionNumber)); |
270
|
|
|
if($subset === self::GB2312_SUBSET){ |
271
|
|
|
$result .= $this->decodeHanziSegment($bits, $length); |
272
|
|
|
} |
273
|
|
|
}*/ |
274
|
|
|
# else{ |
275
|
|
|
// "Normal" QR code modes: |
276
|
|
|
if($datamode === Mode::DATA_NUMBER){ |
277
|
|
|
$result .= Number::decodeSegment($bits, $versionNumber); |
278
|
|
|
} |
279
|
|
|
elseif($datamode === Mode::DATA_ALPHANUM){ |
280
|
|
|
$str = AlphaNum::decodeSegment($bits, $versionNumber); |
281
|
|
|
|
282
|
|
|
// See section 6.4.8.1, 6.4.8.2 |
283
|
|
|
/* if($fc1InEffect){ |
284
|
|
|
$start = \strlen($str); |
285
|
|
|
// We need to massage the result a bit if in an FNC1 mode: |
286
|
|
|
for($i = $start; $i < $start; $i++){ |
287
|
|
|
if($str[$i] === '%'){ |
288
|
|
|
if($i < $start - 1 && $str[$i + 1] === '%'){ |
289
|
|
|
// %% is rendered as % |
290
|
|
|
$str = \substr_replace($str, '', $i + 1, 1);//deleteCharAt(i + 1); |
291
|
|
|
} |
292
|
|
|
# else{ |
293
|
|
|
// In alpha mode, % should be converted to FNC1 separator 0x1D @todo |
294
|
|
|
# $str = setCharAt($i, \chr(0x1D)); // ??? |
295
|
|
|
# } |
296
|
|
|
} |
297
|
|
|
} |
298
|
|
|
} |
299
|
|
|
*/ |
300
|
|
|
$result .= $str; |
301
|
|
|
} |
302
|
|
|
elseif($datamode === Mode::DATA_BYTE){ |
303
|
|
|
$str = Byte::decodeSegment($bits, $versionNumber); |
304
|
|
|
|
305
|
|
|
if($eciCharset !== null){ |
306
|
|
|
$encoding = $eciCharset->getName(); |
307
|
|
|
|
308
|
|
|
if($encoding === null){ |
309
|
|
|
// The spec isn't clear on this mode; see |
310
|
|
|
// section 6.4.5: t does not say which encoding to assuming |
311
|
|
|
// upon decoding. I have seen ISO-8859-1 used as well as |
312
|
|
|
// Shift_JIS -- without anything like an ECI designator to |
313
|
|
|
// give a hint. |
314
|
|
|
$encoding = mb_detect_encoding($str, ['ISO-8859-1', 'SJIS', 'UTF-8']); |
315
|
|
|
} |
316
|
|
|
|
317
|
|
|
$eciCharset = null; |
318
|
|
|
$str = mb_convert_encoding($str, $encoding); |
319
|
|
|
} |
320
|
|
|
|
321
|
|
|
$result .= $str; |
322
|
|
|
} |
323
|
|
|
elseif($datamode === Mode::DATA_KANJI){ |
324
|
|
|
$result .= Kanji::decodeSegment($bits, $versionNumber); |
325
|
|
|
} |
326
|
|
|
else{ |
327
|
|
|
throw new RuntimeException('invalid data mode'); |
328
|
|
|
} |
329
|
|
|
# } |
330
|
|
|
} |
331
|
|
|
} |
332
|
|
|
|
333
|
|
|
return new DecoderResult($bytes, $result, $version, $ecLevel, $symbolSequence, $parityData); |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
} |
337
|
|
|
|