Passed
Push — v5 ( 93618e...84eb31 )
by smiley
01:52
created

Binarizer::calculateBlackPoints()   F

Complexity

Conditions 17
Paths 358

Size

Total Lines 86
Code Lines 37

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 17
eloc 37
c 1
b 0
f 0
nc 358
nop 5
dl 0
loc 86
rs 2.4083

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * Class Binarizer
4
 *
5
 * @created      17.01.2021
6
 * @author       ZXing Authors
7
 * @author       Smiley <[email protected]>
8
 * @copyright    2021 Smiley
9
 * @license      Apache-2.0
10
 */
11
12
namespace chillerlan\QRCode\Decoder;
13
14
use RuntimeException;
15
use function array_fill, count, max;
16
17
/**
18
 * This class implements a local thresholding algorithm, which while slower than the
19
 * GlobalHistogramBinarizer, is fairly efficient for what it does. It is designed for
20
 * high frequency images of barcodes with black data on white backgrounds. For this application,
21
 * it does a much better job than a global blackpoint with severe shadows and gradients.
22
 * However it tends to produce artifacts on lower frequency images and is therefore not
23
 * a good general purpose binarizer for uses outside ZXing.
24
 *
25
 * This class extends GlobalHistogramBinarizer, using the older histogram approach for 1D readers,
26
 * and the newer local approach for 2D readers. 1D decoding using a per-row histogram is already
27
 * inherently local, and only fails for horizontal gradients. We can revisit that problem later,
28
 * but for now it was not a win to use local blocks for 1D.
29
 *
30
 * This Binarizer is the default for the unit tests and the recommended class for library users.
31
 *
32
 * @author [email protected] (Daniel Switkin)
33
 */
34
final class Binarizer{
35
36
	// This class uses 5x5 blocks to compute local luminance, where each block is 8x8 pixels.
37
	// So this is the smallest dimension in each axis we can accept.
38
	private const BLOCK_SIZE_POWER  = 3;
39
	private const BLOCK_SIZE        = 8; // ...0100...00
40
	private const BLOCK_SIZE_MASK   = 7;   // ...0011...11
41
	private const MINIMUM_DIMENSION = 40;
42
	private const MIN_DYNAMIC_RANGE = 24;
43
44
#	private const LUMINANCE_BITS    = 5;
45
	private const LUMINANCE_SHIFT   = 3;
46
	private const LUMINANCE_BUCKETS = 32;
47
48
	private LuminanceSource $source;
49
50
	public function __construct(LuminanceSource $source){
51
		$this->source = $source;
52
	}
53
54
	/**
55
	 * @throws \RuntimeException
56
	 */
57
	private function estimateBlackPoint(array $buckets):int{
58
		// Find the tallest peak in the histogram.
59
		$numBuckets     = count($buckets);
60
		$maxBucketCount = 0;
61
		$firstPeak      = 0;
62
		$firstPeakSize  = 0;
63
64
		for($x = 0; $x < $numBuckets; $x++){
65
66
			if($buckets[$x] > $firstPeakSize){
67
				$firstPeak     = $x;
68
				$firstPeakSize = $buckets[$x];
69
			}
70
71
			if($buckets[$x] > $maxBucketCount){
72
				$maxBucketCount = $buckets[$x];
73
			}
74
		}
75
76
		// Find the second-tallest peak which is somewhat far from the tallest peak.
77
		$secondPeak      = 0;
78
		$secondPeakScore = 0;
79
80
		for($x = 0; $x < $numBuckets; $x++){
81
			$distanceToBiggest = $x - $firstPeak;
82
			// Encourage more distant second peaks by multiplying by square of distance.
83
			$score = $buckets[$x] * $distanceToBiggest * $distanceToBiggest;
84
85
			if($score > $secondPeakScore){
86
				$secondPeak      = $x;
87
				$secondPeakScore = $score;
88
			}
89
		}
90
91
		// Make sure firstPeak corresponds to the black peak.
92
		if($firstPeak > $secondPeak){
93
			$temp       = $firstPeak;
94
			$firstPeak  = $secondPeak;
95
			$secondPeak = $temp;
96
		}
97
98
		// If there is too little contrast in the image to pick a meaningful black point, throw rather
99
		// than waste time trying to decode the image, and risk false positives.
100
		if($secondPeak - $firstPeak <= $numBuckets / 16){
101
			throw new RuntimeException('no meaningful dark point found');
102
		}
103
104
		// Find a valley between them that is low and closer to the white peak.
105
		$bestValley      = $secondPeak - 1;
106
		$bestValleyScore = -1;
107
108
		for($x = $secondPeak - 1; $x > $firstPeak; $x--){
109
			$fromFirst = $x - $firstPeak;
110
			$score     = $fromFirst * $fromFirst * ($secondPeak - $x) * ($maxBucketCount - $buckets[$x]);
111
112
			if($score > $bestValleyScore){
113
				$bestValley      = $x;
114
				$bestValleyScore = $score;
115
			}
116
		}
117
118
		return $bestValley << self::LUMINANCE_SHIFT;
119
	}
120
121
	/**
122
	 * Calculates the final BitMatrix once for all requests. This could be called once from the
123
	 * constructor instead, but there are some advantages to doing it lazily, such as making
124
	 * profiling easier, and not doing heavy lifting when callers don't expect it.
125
	 *
126
	 * Converts a 2D array of luminance data to 1 bit data. As above, assume this method is expensive
127
	 * and do not call it repeatedly. This method is intended for decoding 2D barcodes and may or
128
	 * may not apply sharpening. Therefore, a row from this matrix may not be identical to one
129
	 * fetched using getBlackRow(), so don't mix and match between them.
130
	 *
131
	 * @return \chillerlan\QRCode\Decoder\BitMatrix The 2D array of bits for the image (true means black).
132
	 */
133
	public function getBlackMatrix():BitMatrix{
134
		$width  = $this->source->getWidth();
135
		$height = $this->source->getHeight();
136
137
		if($width >= self::MINIMUM_DIMENSION && $height >= self::MINIMUM_DIMENSION){
138
			$subWidth = $width >> self::BLOCK_SIZE_POWER;
139
140
			if(($width & self::BLOCK_SIZE_MASK) !== 0){
141
				$subWidth++;
142
			}
143
144
			$subHeight = $height >> self::BLOCK_SIZE_POWER;
145
146
			if(($height & self::BLOCK_SIZE_MASK) !== 0){
147
				$subHeight++;
148
			}
149
150
			return $this->calculateThresholdForBlock($subWidth, $subHeight, $width, $height);
151
		}
152
153
		// If the image is too small, fall back to the global histogram approach.
154
		return $this->getHistogramBlackMatrix($width, $height);
155
	}
156
157
	public function getHistogramBlackMatrix(int $width, int $height):BitMatrix{
158
		$matrix = new BitMatrix(max($width, $height));
159
160
		// Quickly calculates the histogram by sampling four rows from the image. This proved to be
161
		// more robust on the blackbox tests than sampling a diagonal as we used to do.
162
		$buckets = array_fill(0, self::LUMINANCE_BUCKETS, 0);
163
164
		for($y = 1; $y < 5; $y++){
165
			$row             = (int)($height * $y / 5);
166
			$localLuminances = $this->source->getRow($row);
167
			$right           = (int)(($width * 4) / 5);
168
169
			for($x = (int)($width / 5); $x < $right; $x++){
170
				$pixel = $localLuminances[(int)$x] & 0xff;
171
				$buckets[$pixel >> self::LUMINANCE_SHIFT]++;
172
			}
173
		}
174
175
		$blackPoint = $this->estimateBlackPoint($buckets);
176
177
		// We delay reading the entire image luminance until the black point estimation succeeds.
178
		// Although we end up reading four rows twice, it is consistent with our motto of
179
		// "fail quickly" which is necessary for continuous scanning.
180
		$localLuminances = $this->source->getMatrix();
181
182
		for($y = 0; $y < $height; $y++){
183
			$offset = $y * $width;
184
185
			for($x = 0; $x < $width; $x++){
186
				$pixel = (int)($localLuminances[$offset + $x] & 0xff);
187
188
				if($pixel < $blackPoint){
189
					$matrix->set($x, $y);
190
				}
191
			}
192
		}
193
194
		return $matrix;
195
	}
196
197
	/**
198
	 * Calculates a single black point for each block of pixels and saves it away.
199
	 * See the following thread for a discussion of this algorithm:
200
	 *
201
	 * @see http://groups.google.com/group/zxing/browse_thread/thread/d06efa2c35a7ddc0
202
	 */
203
	private function calculateBlackPoints(array $luminances, int $subWidth, int $subHeight, int $width, int $height):array{
204
		$blackPoints = array_fill(0, $subHeight, 0);
205
206
		foreach($blackPoints as $key => $point){
207
			$blackPoints[$key] = array_fill(0, $subWidth, 0);
208
		}
209
210
		for($y = 0; $y < $subHeight; $y++){
211
			$yoffset    = ($y << self::BLOCK_SIZE_POWER);
212
			$maxYOffset = $height - self::BLOCK_SIZE;
213
214
			if($yoffset > $maxYOffset){
215
				$yoffset = $maxYOffset;
216
			}
217
218
			for($x = 0; $x < $subWidth; $x++){
219
				$xoffset    = ($x << self::BLOCK_SIZE_POWER);
220
				$maxXOffset = $width - self::BLOCK_SIZE;
221
222
				if($xoffset > $maxXOffset){
223
					$xoffset = $maxXOffset;
224
				}
225
226
				$sum = 0;
227
				$min = 255;
228
				$max = 0;
229
230
				for($yy = 0, $offset = $yoffset * $width + $xoffset; $yy < self::BLOCK_SIZE; $yy++, $offset += $width){
231
232
					for($xx = 0; $xx < self::BLOCK_SIZE; $xx++){
233
						$pixel = (int)($luminances[(int)($offset + $xx)]) & 0xff;
234
						$sum   += $pixel;
235
						// still looking for good contrast
236
						if($pixel < $min){
237
							$min = $pixel;
238
						}
239
240
						if($pixel > $max){
241
							$max = $pixel;
242
						}
243
					}
244
245
					// short-circuit min/max tests once dynamic range is met
246
					if($max - $min > self::MIN_DYNAMIC_RANGE){
247
						// finish the rest of the rows quickly
248
						for($yy++, $offset += $width; $yy < self::BLOCK_SIZE; $yy++, $offset += $width){
249
							for($xx = 0; $xx < self::BLOCK_SIZE; $xx++){
250
								$sum += $luminances[$offset + $xx] & 0xff;
251
							}
252
						}
253
					}
254
				}
255
256
				// The default estimate is the average of the values in the block.
257
				$average = $sum >> (self::BLOCK_SIZE_POWER * 2);
258
259
				if($max - $min <= self::MIN_DYNAMIC_RANGE){
260
					// If variation within the block is low, assume this is a block with only light or only
261
					// dark pixels. In that case we do not want to use the average, as it would divide this
262
					// low contrast area into black and white pixels, essentially creating data out of noise.
263
					//
264
					// The default assumption is that the block is light/background. Since no estimate for
265
					// the level of dark pixels exists locally, use half the min for the block.
266
					$average = (int)($min / 2);
267
268
					if($y > 0 && $x > 0){
269
						// Correct the "white background" assumption for blocks that have neighbors by comparing
270
						// the pixels in this block to the previously calculated black points. This is based on
271
						// the fact that dark barcode symbology is always surrounded by some amount of light
272
						// background for which reasonable black point estimates were made. The bp estimated at
273
						// the boundaries is used for the interior.
274
275
						// The (min < bp) is arbitrary but works better than other heuristics that were tried.
276
						$averageNeighborBlackPoint = (int)(($blackPoints[$y - 1][$x] + (2 * $blackPoints[$y][$x - 1]) + $blackPoints[$y - 1][$x - 1]) / 4);
277
278
						if($min < $averageNeighborBlackPoint){
279
							$average = $averageNeighborBlackPoint;
280
						}
281
					}
282
				}
283
284
				$blackPoints[$y][$x] = (int)($average);
285
			}
286
		}
287
288
		return $blackPoints;
289
	}
290
291
	/**
292
	 * For each block in the image, calculate the average black point using a 5x5 grid
293
	 * of the blocks around it. Also handles the corner cases (fractional blocks are computed based
294
	 * on the last pixels in the row/column which are also used in the previous block).
295
	 */
296
	private function calculateThresholdForBlock(
297
		int $subWidth,
298
		int $subHeight,
299
		int $width,
300
		int $height
301
	):BitMatrix{
302
		$matrix      = new BitMatrix(max($width, $height));
303
		$luminances  = $this->source->getMatrix();
304
		$blackPoints = $this->calculateBlackPoints($luminances, $subWidth, $subHeight, $width, $height);
305
306
		for($y = 0; $y < $subHeight; $y++){
307
			$yoffset    = ($y << self::BLOCK_SIZE_POWER);
308
			$maxYOffset = $height - self::BLOCK_SIZE;
309
310
			if($yoffset > $maxYOffset){
311
				$yoffset = $maxYOffset;
312
			}
313
314
			for($x = 0; $x < $subWidth; $x++){
315
				$xoffset    = ($x << self::BLOCK_SIZE_POWER);
316
				$maxXOffset = $width - self::BLOCK_SIZE;
317
318
				if($xoffset > $maxXOffset){
319
					$xoffset = $maxXOffset;
320
				}
321
322
				$left = $this->cap($x, 2, $subWidth - 3);
323
				$top  = $this->cap($y, 2, $subHeight - 3);
324
				$sum  = 0;
325
326
				for($z = -2; $z <= 2; $z++){
327
					$blackRow = $blackPoints[$top + $z];
328
					$sum      += $blackRow[$left - 2] + $blackRow[$left - 1] + $blackRow[$left] + $blackRow[$left + 1] + $blackRow[$left + 2];
329
				}
330
331
				$average = (int)($sum / 25);
332
333
				// Applies a single threshold to a block of pixels.
334
				for($j = 0, $o = $yoffset * $width + $xoffset; $j < self::BLOCK_SIZE; $j++, $o += $width){
335
					for($i = 0; $i < self::BLOCK_SIZE; $i++){
336
						// Comparison needs to be <= so that black == 0 pixels are black even if the threshold is 0.
337
						if(($luminances[$o + $i] & 0xff) <= $average){
338
							$matrix->set($xoffset + $i, $yoffset + $j);
339
						}
340
					}
341
				}
342
			}
343
		}
344
345
		return $matrix;
346
	}
347
348
	private function cap(int $value, int $min, int $max):int{
349
350
		if($value < $min){
351
			return $min;
352
		}
353
354
		if($value > $max){
355
			return $max;
356
		}
357
358
		return $value;
359
	}
360
361
}
362