Decoder::complianceError()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
nc 1
nop 2
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
c 1
b 0
f 0
1
<?php declare(strict_types=1);
2
3
/**
4
* @package   s9e\Bencode
5
* @copyright Copyright (c) The s9e authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\Bencode;
9
10
use ArrayObject;
11
use Throwable;
12
use TypeError;
13
use const PHP_INT_MAX, PHP_INT_MIN;
14
use function is_int, preg_match, str_contains, str_ends_with, strcmp, strlen, strspn, substr, substr_compare;
15
use s9e\Bencode\Exceptions\ComplianceError;
16
use s9e\Bencode\Exceptions\DecodingException;
17
18
class Decoder
19
{
20
	/**
21
	* @var int Length of the bencoded string
22
	*/
23
	protected int $len;
24
25
	/**
26
	* @var int Safe rightmost boundary
27
	*/
28
	protected int $max;
29
30
	/**
31
	* @var int Position of the cursor while decoding
32
	*/
33
	protected int $offset = 0;
34
35
	/**
36
	* @param string $bencoded Bencoded string to decode
37
	*/
38 108
	public static function decode(string $bencoded): ArrayObject|array|int|string
39
	{
40 108
		$decoder = new static($bencoded);
41
		try
42
		{
43 92
			$value = $decoder->decodeAnything();
44
		}
45 44
		catch (TypeError $e)
46
		{
47 3
			throw static::convertTypeError($e, $decoder->offset);
48
		}
49
50 48
		$decoder->checkCursorPosition();
51
52 40
		return $value;
53
	}
54
55
	/**
56
	* @param string $bencoded Bencoded string being decoded
57
	*/
58 108
	final protected function __construct(protected readonly string $bencoded)
59
	{
60 108
		$this->len = strlen($bencoded);
61 108
		$this->max = $this->getSafeBoundary();
62
63 108
		$this->checkBoundary();
64
	}
65
66
	/**
67
	* Cast given string as an integer and check for clamping
68
	*/
69 31
	final protected function castInteger(string $string, int $clamp): int
70
	{
71 31
		$value = (int) $string;
72 31
		if ($value === $clamp && !is_int(+$string))
73
		{
74 2
			throw new DecodingException('Integer overflow', $this->offset - 1 - strlen($string));
75
		}
76
77 29
		return $value;
78
	}
79
80 108
	protected function checkBoundary(): void
81
	{
82 108
		if ($this->max < 1)
83
		{
84 16
			throw match (substr($this->bencoded, 0, 1))
85
			{
86 1
				''       => new DecodingException('Premature end of data', 0),
87 1
				'-'      => new DecodingException('Illegal character',     0),
88 1
				'e'      => new DecodingException('Illegal character',     0),
89 13
				default  => new DecodingException('Premature end of data', $this->len - 1)
90
			};
91
		}
92
	}
93
94
	/**
95
	* Check the cursor's position after decoding is done
96
	*/
97 48
	protected function checkCursorPosition(): void
98
	{
99 48
		if ($this->offset === $this->len)
100
		{
101 37
			return;
102
		}
103 11
		if ($this->offset > $this->len)
104
		{
105 3
			throw new DecodingException('Premature end of data', $this->len - 1);
106
		}
107
108 8
		$this->complianceError('Superfluous content', $this->offset);
109
	}
110
111 11
	protected function complianceError(string $message, int $offset): void
112
	{
113 11
		throw new ComplianceError($message, $offset);
114
	}
115
116 3
	protected static function convertTypeError(TypeError $e, int $offset): Throwable
117
	{
118
		// A type error can occur in decodeString() if the string length exceeds an int
119 3
		$frame  = $e->getTrace()[0];
120 3
		$caller = $frame['class'] . $frame['type'] . $frame['function'];
121 3
		if ($caller === __CLASS__ . '->decodeString')
122
		{
123 1
			return new DecodingException('String length overflow', $offset - 1);
124
		}
125
126
		// Return any other error as-is
127 2
		return $e;
128
	}
129
130 92
	protected function decodeAnything(): ArrayObject|array|int|string
131
	{
132 92
		return match ($this->bencoded[$this->offset])
133 92
		{
134 92
			'i'     => $this->decodeInteger(),
135 92
			'd'     => $this->decodeDictionary(),
136 92
			'l'     => $this->decodeList(),
137 92
			default => $this->decodeString()
138 92
		};
139
	}
140
141 34
	protected function decodeDictionary(): ArrayObject
142
	{
143 34
		$values  = [];
144 34
		$lastKey = '';
145
146 34
		++$this->offset;
147 33
		while ($this->offset <= $this->max)
148
		{
149 32
			$c = $this->bencoded[$this->offset];
150 32
			if ($c === 'e')
151
			{
152 16
				++$this->offset;
153
154 16
				return new ArrayObject($values, ArrayObject::ARRAY_AS_PROPS);
155
			}
156
157
			// Quickly match the most common keys found in dictionaries
158 31
			$key = match ($c)
159 31
			{
160 31
				'4'     => $this->decodeFastString('4:path',      6, 'path'    ),
161 31
				'6'     => $this->decodeFastString('6:length',    8, 'length'  ),
162 31
				'8'     => $this->decodeFastString('8:announce', 10, 'announce'),
163 31
				'5'     => $this->decodeFastString('5:files',     7, 'files'   ),
164 31
				default => $this->decodeString()
165 31
			};
166 25
			if (strcmp($key, $lastKey) <= 0 && !empty($values))
167
			{
168 8
				$this->dictionaryComplianceError($key, $lastKey);
169
			}
170
171 25
			if ($this->offset <= $this->max)
172
			{
173 23
				$values[$key] = $this->decodeAnything();
174 19
				$lastKey      = $key;
175
			}
176
		}
177
178 4
		throw new DecodingException('Premature end of data', $this->len - 1);
179
	}
180
181
	/**
182
	* @param string $match Bencoded string to match
183
	* @param int    $len   Length of the bencoded string
184
	* @param string $value String value to return if the string matches
185
	*/
186 8
	protected function decodeFastString(string $match, int $len, string $value): string
187
	{
188 8
		if (substr_compare($this->bencoded, $match, $this->offset, $len, false) === 0)
189
		{
190 5
			$this->offset += $len;
191
192 5
			return $value;
193
		}
194 6
		if ($this->bencoded[$this->offset + 1] === ':')
195
		{
196 4
			$value = substr($this->bencoded, $this->offset + 2, $len - 2);
197 4
			$this->offset += $len;
198
199 4
			return $value;
200
		}
201
202 2
		return $this->decodeString();
203
	}
204
205 41
	protected function decodeInteger(): int
206
	{
207 41
		if ($this->bencoded[++$this->offset] === '-')
208
		{
209 8
			if ($this->bencoded[++$this->offset] === '0')
210
			{
211 4
				$this->complianceError('Illegal character', $this->offset);
212
			}
213
214 6
			$clamp  = PHP_INT_MIN;
215 6
			$string = '-' . $this->readDigits('e');
216
		}
217
		else
218
		{
219 33
			$clamp  = PHP_INT_MAX;
220 33
			$string = $this->readDigits('e');
221
		}
222
223 31
		return $this->castInteger($string, $clamp);
224
	}
225
226 19
	protected function decodeList(): array
227
	{
228 19
		++$this->offset;
229
230 19
		$list = [];
231 19
		while ($this->offset <= $this->max)
232
		{
233 18
			if ($this->bencoded[$this->offset] === 'e')
234
			{
235 9
				++$this->offset;
236
237 9
				return $list;
238
			}
239
240 15
			$list[] = $this->decodeAnything();
241
		}
242
243 5
		throw new DecodingException('Premature end of data', $this->len - 1);
244
	}
245
246 53
	protected function decodeString(): string
247
	{
248 53
		$len = (int) $this->readDigits(':');
249 39
		$string = substr($this->bencoded, $this->offset, $len);
250 39
		$this->offset += $len;
251
252 38
		return $string;
253
	}
254
255 4
	protected function dictionaryComplianceError(string $key, string $lastKey): void
256
	{
257
		// Compute the offset of the start of the string used as key
258 4
		$offset = $this->offset - strlen(strlen($key) . ':') - strlen($key);
259
260 4
		$msg = ($key === $lastKey) ? 'Duplicate' : 'Out of order';
261 4
		$this->complianceError($msg . " dictionary entry '" . $key . "'", $offset);
262
	}
263
264 12
	protected function digitException(): DecodingException
265
	{
266
		// We use the same string as readDigits() purely to save one interned string
267 12
		return (str_contains('1463720859', $this->bencoded[$this->offset]))
268 3
		     ? new ComplianceError('Illegal character', $this->offset)
269 12
		     : new DecodingException('Illegal character', $this->offset);
270
	}
271
272
	/**
273
	* Return the rightmost boundary to the last safe character that can start a value
274
	*
275
	* Will rewind the boundary to skip the rightmost digits, optionally preceded by "i" or "i-"
276
	*/
277 108
	protected function getSafeBoundary(): int
278
	{
279 108
		if (str_ends_with($this->bencoded, 'e'))
280
		{
281 57
			return $this->len - 1;
282
		}
283
284 51
		preg_match('(i?-?[0-9]*+$)D', $this->bencoded, $m);
285
286 51
		return $this->len - 1 - strlen($m[0]);
287
	}
288
289 78
	protected function readDigits(string $terminator): string
290
	{
291 78
		if ($this->bencoded[$this->offset] === '0')
292
		{
293 20
			++$this->offset;
294 20
			$string = '0';
295
		}
296
		else
297
		{
298
			// Digits sorted by decreasing frequency as observed on a random sample of torrent files
299
			// which speeds it up on PHP < 8.4
300 67
			$spn = strspn($this->bencoded, '1463720859', $this->offset);
301 67
			if ($spn === 0)
302
			{
303 12
				throw new DecodingException('Illegal character', $this->offset);
304
			}
305 57
			$string = substr($this->bencoded, $this->offset, $spn);
306 57
			$this->offset += $spn;
307
		}
308
309 68
		if ($this->bencoded[$this->offset] !== $terminator)
310
		{
311 12
			throw $this->digitException();
312
		}
313 56
		++$this->offset;
314
315 56
		return $string;
316
	}
317
}