|
1
|
|
|
<?php |
|
2
|
|
|
/** |
|
3
|
|
|
* Class Decoder |
|
4
|
|
|
* |
|
5
|
|
|
* @created 17.01.2021 |
|
6
|
|
|
* @author ZXing Authors |
|
7
|
|
|
* @author Smiley <[email protected]> |
|
8
|
|
|
* @copyright 2021 Smiley |
|
9
|
|
|
* @license Apache-2.0 |
|
10
|
|
|
*/ |
|
11
|
|
|
|
|
12
|
|
|
namespace chillerlan\QRCode\Decoder; |
|
13
|
|
|
|
|
14
|
|
|
use InvalidArgumentException, RuntimeException, Throwable; |
|
15
|
|
|
use chillerlan\QRCode\Common\{BitBuffer, EccLevel, Mode, ReedSolomonDecoder, Version}; |
|
16
|
|
|
use chillerlan\QRCode\Data\{AlphaNum, Byte, ECI, Kanji, Number}; |
|
17
|
|
|
use chillerlan\QRCode\Detector\Detector; |
|
18
|
|
|
use function count, array_fill, mb_convert_encoding, mb_detect_encoding; |
|
19
|
|
|
|
|
20
|
|
|
/** |
|
21
|
|
|
* <p>The main class which implements QR Code decoding -- as opposed to locating and extracting |
|
22
|
|
|
* the QR Code from an image.</p> |
|
23
|
|
|
* |
|
24
|
|
|
* @author Sean Owen |
|
25
|
|
|
*/ |
|
26
|
|
|
final class Decoder{ |
|
27
|
|
|
|
|
28
|
|
|
# private const GB2312_SUBSET = 1; |
|
29
|
|
|
|
|
30
|
|
|
/** |
|
31
|
|
|
* <p>Decodes a QR Code represented as a {@link \chillerlan\QRCode\Decoder\BitMatrix}. |
|
32
|
|
|
* A 1 or "true" is taken to mean a black module.</p> |
|
33
|
|
|
* |
|
34
|
|
|
* @param \chillerlan\QRCode\Decoder\LuminanceSourceInterface $source |
|
35
|
|
|
* |
|
36
|
|
|
* @return \chillerlan\QRCode\Decoder\DecoderResult text and bytes encoded within the QR Code |
|
37
|
|
|
* @throws \Throwable if the QR Code cannot be decoded |
|
38
|
|
|
*/ |
|
39
|
|
|
public function decode(LuminanceSourceInterface $source):DecoderResult{ |
|
40
|
|
|
$bitMatrix = (new Detector($source))->detect(); |
|
41
|
|
|
|
|
42
|
|
|
try{ |
|
43
|
|
|
// clone the BitMatrix to avoid errors in case we run into mirroring |
|
44
|
|
|
return $this->decodeMatrix(clone $bitMatrix); |
|
45
|
|
|
} |
|
46
|
|
|
catch(Throwable $e){ |
|
47
|
|
|
|
|
48
|
|
|
try{ |
|
49
|
|
|
/* |
|
50
|
|
|
* Prepare for a mirrored reading. |
|
51
|
|
|
* |
|
52
|
|
|
* Since we're here, this means we have successfully detected some kind |
|
53
|
|
|
* of version and format information when mirrored. This is a good sign, |
|
54
|
|
|
* that the QR code may be mirrored, and we should try once more with a |
|
55
|
|
|
* mirrored content. |
|
56
|
|
|
*/ |
|
57
|
|
|
return $this->decodeMatrix($bitMatrix->setMirror(true)->mirror()); |
|
58
|
|
|
} |
|
59
|
|
|
catch(Throwable $f){ |
|
60
|
|
|
// Throw the exception from the original reading |
|
61
|
|
|
throw $e; |
|
62
|
|
|
} |
|
63
|
|
|
|
|
64
|
|
|
} |
|
65
|
|
|
|
|
66
|
|
|
} |
|
67
|
|
|
|
|
68
|
|
|
/** |
|
69
|
|
|
* @param \chillerlan\QRCode\Decoder\BitMatrix $bitMatrix |
|
70
|
|
|
* |
|
71
|
|
|
* @return \chillerlan\QRCode\Decoder\DecoderResult |
|
72
|
|
|
*/ |
|
73
|
|
|
private function decodeMatrix(BitMatrix $bitMatrix):DecoderResult{ |
|
74
|
|
|
// Read raw codewords |
|
75
|
|
|
$rawCodewords = $bitMatrix->readCodewords(); |
|
76
|
|
|
$version = $bitMatrix->getVersion(); |
|
77
|
|
|
$formatInfo = $bitMatrix->getFormatInfo(); |
|
78
|
|
|
|
|
79
|
|
|
// technically this shouldn't happen as the respective read meathods would throw first |
|
80
|
|
|
if($version === null || $formatInfo === null){ |
|
81
|
|
|
throw new RuntimeException('unable to read version or ecc level'); |
|
82
|
|
|
} |
|
83
|
|
|
|
|
84
|
|
|
$eccLevel = $formatInfo->getErrorCorrectionLevel(); |
|
85
|
|
|
|
|
86
|
|
|
// Separate into data blocks |
|
87
|
|
|
$dataBlocks = $this->getDataBlocks($rawCodewords, $version, $eccLevel); |
|
88
|
|
|
|
|
89
|
|
|
$resultBytes = []; |
|
90
|
|
|
$resultOffset = 0; |
|
91
|
|
|
|
|
92
|
|
|
// Error-correct and copy data blocks together into a stream of bytes |
|
93
|
|
|
foreach($dataBlocks as $dataBlock){ |
|
94
|
|
|
[$numDataCodewords, $codewordBytes] = $dataBlock; |
|
95
|
|
|
|
|
96
|
|
|
$corrected = $this->correctErrors($codewordBytes, $numDataCodewords); |
|
97
|
|
|
|
|
98
|
|
|
for($i = 0; $i < $numDataCodewords; $i++){ |
|
99
|
|
|
$resultBytes[$resultOffset++] = $corrected[$i]; |
|
100
|
|
|
} |
|
101
|
|
|
} |
|
102
|
|
|
|
|
103
|
|
|
// Decode the contents of that stream of bytes |
|
104
|
|
|
return $this->decodeBitStream($resultBytes, $version, $eccLevel); |
|
105
|
|
|
} |
|
106
|
|
|
|
|
107
|
|
|
/** |
|
108
|
|
|
* <p>When QR Codes use multiple data blocks, they are actually interleaved. |
|
109
|
|
|
* That is, the first byte of data block 1 to n is written, then the second bytes, and so on. This |
|
110
|
|
|
* method will separate the data into original blocks.</p> |
|
111
|
|
|
* |
|
112
|
|
|
* @param array $rawCodewords bytes as read directly from the QR Code |
|
113
|
|
|
* @param \chillerlan\QRCode\Common\Version $version version of the QR Code |
|
114
|
|
|
* @param \chillerlan\QRCode\Common\EccLevel $eccLevel error-correction level of the QR Code |
|
115
|
|
|
* |
|
116
|
|
|
* @return array DataBlocks containing original bytes, "de-interleaved" from representation in the QR Code |
|
117
|
|
|
* @throws \InvalidArgumentException |
|
118
|
|
|
*/ |
|
119
|
|
|
private function getDataBlocks(array $rawCodewords, Version $version, EccLevel $eccLevel):array{ |
|
120
|
|
|
|
|
121
|
|
|
if(count($rawCodewords) !== $version->getTotalCodewords()){ |
|
122
|
|
|
throw new InvalidArgumentException('$rawCodewords differ from total codewords for version'); |
|
123
|
|
|
} |
|
124
|
|
|
|
|
125
|
|
|
// Figure out the number and size of data blocks used by this version and |
|
126
|
|
|
// error correction level |
|
127
|
|
|
[$numEccCodewords, $eccBlocks] = $version->getRSBlocks($eccLevel); |
|
128
|
|
|
|
|
129
|
|
|
// Now establish DataBlocks of the appropriate size and number of data codewords |
|
130
|
|
|
$result = [];//new DataBlock[$totalBlocks]; |
|
131
|
|
|
$numResultBlocks = 0; |
|
132
|
|
|
|
|
133
|
|
|
foreach($eccBlocks as $blockData){ |
|
134
|
|
|
[$numEccBlocks, $eccPerBlock] = $blockData; |
|
135
|
|
|
|
|
136
|
|
|
for($i = 0; $i < $numEccBlocks; $i++, $numResultBlocks++){ |
|
137
|
|
|
$result[$numResultBlocks] = [$eccPerBlock, array_fill(0, $numEccCodewords + $eccPerBlock, 0)]; |
|
138
|
|
|
} |
|
139
|
|
|
} |
|
140
|
|
|
|
|
141
|
|
|
// All blocks have the same amount of data, except that the last n |
|
142
|
|
|
// (where n may be 0) have 1 more byte. Figure out where these start. |
|
143
|
|
|
/** @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset */ |
|
144
|
|
|
$shorterBlocksTotalCodewords = count($result[0][1]); |
|
145
|
|
|
$longerBlocksStartAt = count($result) - 1; |
|
146
|
|
|
|
|
147
|
|
|
while($longerBlocksStartAt >= 0){ |
|
148
|
|
|
$numCodewords = count($result[$longerBlocksStartAt][1]); |
|
149
|
|
|
|
|
150
|
|
|
if($numCodewords == $shorterBlocksTotalCodewords){ |
|
151
|
|
|
break; |
|
152
|
|
|
} |
|
153
|
|
|
|
|
154
|
|
|
$longerBlocksStartAt--; |
|
155
|
|
|
} |
|
156
|
|
|
|
|
157
|
|
|
$longerBlocksStartAt++; |
|
158
|
|
|
|
|
159
|
|
|
$shorterBlocksNumDataCodewords = $shorterBlocksTotalCodewords - $numEccCodewords; |
|
160
|
|
|
// The last elements of result may be 1 element longer; |
|
161
|
|
|
// first fill out as many elements as all of them have |
|
162
|
|
|
$rawCodewordsOffset = 0; |
|
163
|
|
|
|
|
164
|
|
|
for($i = 0; $i < $shorterBlocksNumDataCodewords; $i++){ |
|
165
|
|
|
for($j = 0; $j < $numResultBlocks; $j++){ |
|
166
|
|
|
$result[$j][1][$i] = $rawCodewords[$rawCodewordsOffset++]; |
|
167
|
|
|
} |
|
168
|
|
|
} |
|
169
|
|
|
|
|
170
|
|
|
// Fill out the last data block in the longer ones |
|
171
|
|
|
for($j = $longerBlocksStartAt; $j < $numResultBlocks; $j++){ |
|
172
|
|
|
$result[$j][1][$shorterBlocksNumDataCodewords] = $rawCodewords[$rawCodewordsOffset++]; |
|
173
|
|
|
} |
|
174
|
|
|
|
|
175
|
|
|
// Now add in error correction blocks |
|
176
|
|
|
/** @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset */ |
|
177
|
|
|
$max = count($result[0][1]); |
|
178
|
|
|
|
|
179
|
|
|
for($i = $shorterBlocksNumDataCodewords; $i < $max; $i++){ |
|
180
|
|
|
for($j = 0; $j < $numResultBlocks; $j++){ |
|
181
|
|
|
$iOffset = $j < $longerBlocksStartAt ? $i : $i + 1; |
|
182
|
|
|
$result[$j][1][$iOffset] = $rawCodewords[$rawCodewordsOffset++]; |
|
183
|
|
|
} |
|
184
|
|
|
} |
|
185
|
|
|
|
|
186
|
|
|
return $result; |
|
187
|
|
|
} |
|
188
|
|
|
|
|
189
|
|
|
/** |
|
190
|
|
|
* <p>Given data and error-correction codewords received, possibly corrupted by errors, attempts to |
|
191
|
|
|
* correct the errors in-place using Reed-Solomon error correction.</p> |
|
192
|
|
|
*/ |
|
193
|
|
|
private function correctErrors(array $codewordBytes, int $numDataCodewords):array{ |
|
194
|
|
|
// First read into an array of ints |
|
195
|
|
|
$codewordsInts = []; |
|
196
|
|
|
|
|
197
|
|
|
foreach($codewordBytes as $i => $codewordByte){ |
|
198
|
|
|
$codewordsInts[$i] = $codewordByte & 0xFF; |
|
199
|
|
|
} |
|
200
|
|
|
|
|
201
|
|
|
$decoded = (new ReedSolomonDecoder)->decode($codewordsInts, (count($codewordBytes) - $numDataCodewords)); |
|
202
|
|
|
|
|
203
|
|
|
// Copy back into array of bytes -- only need to worry about the bytes that were data |
|
204
|
|
|
// We don't care about errors in the error-correction codewords |
|
205
|
|
|
for($i = 0; $i < $numDataCodewords; $i++){ |
|
206
|
|
|
$codewordBytes[$i] = $decoded[$i]; |
|
207
|
|
|
} |
|
208
|
|
|
|
|
209
|
|
|
return $codewordBytes; |
|
210
|
|
|
} |
|
211
|
|
|
|
|
212
|
|
|
/** |
|
213
|
|
|
* @throws \RuntimeException |
|
214
|
|
|
*/ |
|
215
|
|
|
private function decodeBitStream(array $bytes, Version $version, EccLevel $ecLevel):DecoderResult{ |
|
216
|
|
|
$bits = new BitBuffer($bytes); |
|
217
|
|
|
$symbolSequence = -1; |
|
218
|
|
|
$parityData = -1; |
|
219
|
|
|
$versionNumber = $version->getVersionNumber(); |
|
220
|
|
|
|
|
221
|
|
|
$result = ''; |
|
222
|
|
|
$eciCharset = null; |
|
223
|
|
|
# $fc1InEffect = false; |
|
224
|
|
|
|
|
225
|
|
|
// While still another segment to read... |
|
226
|
|
|
while($bits->available() >= 4){ |
|
227
|
|
|
$datamode = $bits->read(4); // mode is encoded by 4 bits |
|
228
|
|
|
|
|
229
|
|
|
// OK, assume we're done. Really, a TERMINATOR mode should have been recorded here |
|
230
|
|
|
if($datamode === Mode::TERMINATOR){ |
|
231
|
|
|
break; |
|
232
|
|
|
} |
|
233
|
|
|
|
|
234
|
|
|
if($datamode === Mode::ECI){ |
|
235
|
|
|
// Count doesn't apply to ECI |
|
236
|
|
|
$eciCharset = ECI::parseValue($bits); |
|
237
|
|
|
} |
|
238
|
|
|
/** @noinspection PhpStatementHasEmptyBodyInspection */ |
|
239
|
|
|
elseif($datamode === Mode::FNC1_FIRST || $datamode === Mode::FNC1_SECOND){ |
|
240
|
|
|
// We do little with FNC1 except alter the parsed result a bit according to the spec |
|
241
|
|
|
# $fc1InEffect = true; |
|
242
|
|
|
} |
|
243
|
|
|
elseif($datamode === Mode::STRCTURED_APPEND){ |
|
244
|
|
|
if($bits->available() < 16){ |
|
245
|
|
|
throw new RuntimeException('structured append: not enough bits left'); |
|
246
|
|
|
} |
|
247
|
|
|
// sequence number and parity is added later to the result metadata |
|
248
|
|
|
// Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue |
|
249
|
|
|
$symbolSequence = $bits->read(8); |
|
250
|
|
|
$parityData = $bits->read(8); |
|
251
|
|
|
} |
|
252
|
|
|
else{ |
|
253
|
|
|
// First handle Hanzi mode which does not start with character count |
|
254
|
|
|
/* if($datamode === Mode::DATA_HANZI){ |
|
255
|
|
|
//chinese mode contains a sub set indicator right after mode indicator |
|
256
|
|
|
$subset = $bits->read(4); |
|
257
|
|
|
$length = $bits->read(Mode::getLengthBitsForVersion($datamode, $versionNumber)); |
|
258
|
|
|
if($subset === self::GB2312_SUBSET){ |
|
259
|
|
|
$result .= $this->decodeHanziSegment($bits, $length); |
|
260
|
|
|
} |
|
261
|
|
|
}*/ |
|
262
|
|
|
# else{ |
|
263
|
|
|
// "Normal" QR code modes: |
|
264
|
|
|
if($datamode === Mode::NUMBER){ |
|
265
|
|
|
$result .= Number::decodeSegment($bits, $versionNumber); |
|
266
|
|
|
} |
|
267
|
|
|
elseif($datamode === Mode::ALPHANUM){ |
|
268
|
|
|
$str = AlphaNum::decodeSegment($bits, $versionNumber); |
|
269
|
|
|
|
|
270
|
|
|
// See section 6.4.8.1, 6.4.8.2 |
|
271
|
|
|
/* if($fc1InEffect){ |
|
272
|
|
|
$start = \strlen($str); |
|
273
|
|
|
// We need to massage the result a bit if in an FNC1 mode: |
|
274
|
|
|
for($i = $start; $i < $start; $i++){ |
|
275
|
|
|
if($str[$i] === '%'){ |
|
276
|
|
|
if($i < $start - 1 && $str[$i + 1] === '%'){ |
|
277
|
|
|
// %% is rendered as % |
|
278
|
|
|
$str = \substr_replace($str, '', $i + 1, 1);//deleteCharAt(i + 1); |
|
279
|
|
|
} |
|
280
|
|
|
# else{ |
|
281
|
|
|
// In alpha mode, % should be converted to FNC1 separator 0x1D @todo |
|
282
|
|
|
# $str = setCharAt($i, \chr(0x1D)); // ??? |
|
283
|
|
|
# } |
|
284
|
|
|
} |
|
285
|
|
|
} |
|
286
|
|
|
} |
|
287
|
|
|
*/ |
|
288
|
|
|
$result .= $str; |
|
289
|
|
|
} |
|
290
|
|
|
elseif($datamode === Mode::BYTE){ |
|
291
|
|
|
$str = Byte::decodeSegment($bits, $versionNumber); |
|
292
|
|
|
|
|
293
|
|
|
if($eciCharset !== null){ |
|
294
|
|
|
$encoding = $eciCharset->getName(); |
|
295
|
|
|
|
|
296
|
|
|
if($encoding === null){ |
|
297
|
|
|
// The spec isn't clear on this mode; see |
|
298
|
|
|
// section 6.4.5: t does not say which encoding to assuming |
|
299
|
|
|
// upon decoding. I have seen ISO-8859-1 used as well as |
|
300
|
|
|
// Shift_JIS -- without anything like an ECI designator to |
|
301
|
|
|
// give a hint. |
|
302
|
|
|
$encoding = mb_detect_encoding($str, ['ISO-8859-1', 'SJIS', 'UTF-8']); |
|
303
|
|
|
} |
|
304
|
|
|
|
|
305
|
|
|
$eciCharset = null; |
|
306
|
|
|
$str = mb_convert_encoding($str, $encoding); |
|
307
|
|
|
} |
|
308
|
|
|
|
|
309
|
|
|
$result .= $str; |
|
310
|
|
|
} |
|
311
|
|
|
elseif($datamode === Mode::KANJI){ |
|
312
|
|
|
$result .= Kanji::decodeSegment($bits, $versionNumber); |
|
313
|
|
|
} |
|
314
|
|
|
else{ |
|
315
|
|
|
throw new RuntimeException('invalid data mode'); |
|
316
|
|
|
} |
|
317
|
|
|
# } |
|
318
|
|
|
} |
|
319
|
|
|
} |
|
320
|
|
|
|
|
321
|
|
|
return new DecoderResult([ |
|
322
|
|
|
'rawBytes' => $bytes, |
|
323
|
|
|
'text' => $result, |
|
324
|
|
|
'version' => $version, |
|
325
|
|
|
'eccLevel' => $ecLevel, |
|
326
|
|
|
'structuredAppendParity' => $parityData, |
|
327
|
|
|
'structuredAppendSequence' => $symbolSequence |
|
328
|
|
|
]); |
|
329
|
|
|
} |
|
330
|
|
|
|
|
331
|
|
|
} |
|
332
|
|
|
|