Passed
Push — Experiments ( 897261...2ed8ce )
by Josh
03:14 queued 01:19
created

Decoder::decodeDictionary()   A

Complexity

Conditions 6
Paths 6

Size

Total Lines 28
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 6

Importance

Changes 9
Bugs 0 Features 0
Metric Value
cc 6
eloc 14
c 9
b 0
f 0
nc 6
nop 0
dl 0
loc 28
ccs 15
cts 15
cp 1
crap 6
rs 9.2222
1
<?php declare(strict_types=1);
2
3
/**
4
* @package   s9e\Bencode
5
* @copyright Copyright (c) The s9e authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\Bencode;
9
10
use ArrayObject;
11
use Throwable;
12
use TypeError;
13
use const PHP_INT_MAX, PHP_INT_MIN;
14
use function is_int, preg_match, str_contains, str_ends_with, strcmp, strlen, strspn, substr, substr_compare;
15
use s9e\Bencode\Exceptions\ComplianceError;
16
use s9e\Bencode\Exceptions\DecodingException;
17
18
class Decoder
19
{
20
	/**
21
	* @var int Length of the bencoded string
22
	*/
23
	protected int $len;
24
25
	/**
26
	* @var int Safe rightmost boundary
27
	*/
28
	protected int $max;
29
30
	/**
31
	* @var int Position of the cursor while decoding
32
	*/
33
	protected int $offset = 0;
34
35
	/**
36
	* @param string $bencoded Bencoded string to decode
37
	*/
38 108
	public static function decode(string $bencoded): ArrayObject|array|int|string
39
	{
40 108
		$decoder = new static($bencoded);
41
		try
42
		{
43 92
			$value = $decoder->decodeAnything();
44
		}
45 44
		catch (TypeError $e)
46
		{
47 3
			throw static::convertTypeError($e, $decoder->offset);
48
		}
49
50 48
		$decoder->checkCursorPosition();
51
52 40
		return $value;
53
	}
54
55
	/**
56
	* @param string $bencoded Bencoded string being decoded
57
	*/
58 108
	final protected function __construct(protected readonly string $bencoded)
59
	{
60 108
		$this->len = strlen($bencoded);
61 108
		$this->max = $this->getSafeBoundary();
62
63 108
		$this->checkBoundary();
64
	}
65
66
	/**
67
	* Cast given string as an integer and check for clamping
68
	*/
69 31
	final protected function castInteger(string $string, int $clamp): int
70
	{
71 31
		$value = (int) $string;
72 31
		if ($value === $clamp && !is_int(+$string))
73
		{
74 2
			throw new DecodingException('Integer overflow', $this->offset - 1 - strlen($string));
75
		}
76
77 29
		return $value;
78
	}
79
80 108
	protected function checkBoundary(): void
81
	{
82 108
		if ($this->max < 1)
83
		{
84 16
			throw match (substr($this->bencoded, 0, 1))
85
			{
86 1
				''       => new DecodingException('Premature end of data', 0),
87 1
				'-'      => new DecodingException('Illegal character',     0),
88 1
				'e'      => new DecodingException('Illegal character',     0),
89 13
				default  => new DecodingException('Premature end of data', $this->len - 1)
90
			};
91
		}
92
	}
93
94
	/**
95
	* Check the cursor's position after decoding is done
96
	*/
97 48
	protected function checkCursorPosition(): void
98
	{
99 48
		if ($this->offset === $this->len)
100
		{
101 37
			return;
102
		}
103 11
		if ($this->offset > $this->len)
104
		{
105 3
			throw new DecodingException('Premature end of data', $this->len - 1);
106
		}
107
108 8
		$this->complianceError('Superfluous content', $this->offset);
109
	}
110
111 11
	protected function complianceError(string $message, int $offset): void
112
	{
113 11
		throw new ComplianceError($message, $offset);
114
	}
115
116 3
	protected static function convertTypeError(TypeError $e, int $offset): Throwable
117
	{
118
		// A type error can occur in decodeString() if the string length exceeds an int
119 3
		$frame  = $e->getTrace()[0];
120 3
		$caller = $frame['class'] . $frame['type'] . $frame['function'];
121 3
		if ($caller === __CLASS__ . '->decodeString')
122
		{
123 1
			return new DecodingException('String length overflow', $offset - 1);
124
		}
125
126
		// Return any other error as-is
127 2
		return $e;
128
	}
129
130 92
	protected function decodeAnything(): ArrayObject|array|int|string
131
	{
132 92
		return match ($this->bencoded[$this->offset])
133 92
		{
134 92
			'i'     => $this->decodeInteger(),
135 92
			'd'     => $this->decodeDictionary(),
136 92
			'l'     => $this->decodeList(),
137 92
			default => $this->decodeString()
138 92
		};
139
	}
140
141 34
	protected function decodeDictionary(): ArrayObject
142
	{
143 34
		$values  = [];
144 34
		$lastKey = '';
145
146 34
		++$this->offset;
147 33
		while ($this->offset <= $this->max)
148
		{
149 32
			$key = $this->decodeDictionaryKey();
150 26
			if (!isset($key))
151
			{
152 16
				++$this->offset;
153
154 16
				return new ArrayObject($values, ArrayObject::ARRAY_AS_PROPS);
155
			}
156 25
			if (strcmp($lastKey, $key) >= 0 && !empty($values))
157
			{
158 8
				$this->dictionaryComplianceError($key, $lastKey);
159
			}
160
161 25
			if ($this->offset <= $this->max)
162
			{
163 23
				$values[$key] = $this->decodeAnything();
164 19
				$lastKey      = $key;
165
			}
166
		}
167
168 4
		throw new DecodingException('Premature end of data', $this->len - 1);
169
	}
170
171 32
	final protected function decodeDictionaryKey(): ?string
172
	{
173
		// Quickly match the most common keys found in dictionaries
174 32
		return match ($this->bencoded[$this->offset])
175 32
		{
176 32
			'4'     => $this->decodeFastString('4:path',      6, 'path'    ),
177 32
			'6'     => $this->decodeFastString('6:length',    8, 'length'  ),
178 32
			'8'     => $this->decodeFastString('8:announce', 10, 'announce'),
179 32
			'5'     => $this->decodeFastString('5:files',     7, 'files'   ),
180 32
			'e'     => null,
181 32
			default => $this->decodeString()
182 32
		};
183
	}
184
185
	/**
186
	* @param string $match Bencoded string to match
187
	* @param int    $len   Length of the bencoded string
188
	* @param string $value String value to return if the string matches
189
	*/
190 8
	protected function decodeFastString(string $match, int $len, string $value): string
191
	{
192 8
		if (substr_compare($this->bencoded, $match, $this->offset, $len, false) === 0)
193
		{
194 5
			$this->offset += $len;
195
196 5
			return $value;
197
		}
198 6
		if ($this->bencoded[$this->offset + 1] === ':')
199
		{
200 4
			$value = substr($this->bencoded, $this->offset + 2, $len - 2);
201 4
			$this->offset += $len;
202
203 4
			return $value;
204
		}
205
206 2
		return $this->decodeString();
207
	}
208
209 41
	protected function decodeInteger(): int
210
	{
211 41
		if ($this->bencoded[++$this->offset] === '-')
212
		{
213 8
			if ($this->bencoded[++$this->offset] === '0')
214
			{
215 4
				$this->complianceError('Illegal character', $this->offset);
216
			}
217
218 6
			$clamp  = PHP_INT_MIN;
219 6
			$string = '-' . $this->readDigits('e');
220
		}
221
		else
222
		{
223 33
			$clamp  = PHP_INT_MAX;
224 33
			$string = $this->readDigits('e');
225
		}
226
227 31
		return $this->castInteger($string, $clamp);
228
	}
229
230 19
	protected function decodeList(): array
231
	{
232 19
		++$this->offset;
233
234 19
		$list = [];
235 19
		while ($this->offset <= $this->max)
236
		{
237 18
			if ($this->bencoded[$this->offset] === 'e')
238
			{
239 9
				++$this->offset;
240
241 9
				return $list;
242
			}
243
244 15
			$list[] = $this->decodeAnything();
245
		}
246
247 5
		throw new DecodingException('Premature end of data', $this->len - 1);
248
	}
249
250 53
	protected function decodeString(): string
251
	{
252 53
		$len = (int) $this->readDigits(':');
253 39
		$string = substr($this->bencoded, $this->offset, $len);
254 39
		$this->offset += $len;
255
256 38
		return $string;
257
	}
258
259 4
	protected function dictionaryComplianceError(string $key, string $lastKey): void
260
	{
261
		// Compute the offset of the start of the string used as key
262 4
		$offset = $this->offset - strlen(strlen($key) . ':') - strlen($key);
263
264 4
		$msg = ($key === $lastKey) ? 'Duplicate' : 'Out of order';
265 4
		$this->complianceError($msg . " dictionary entry '" . $key . "'", $offset);
266
	}
267
268 12
	protected function digitException(): DecodingException
269
	{
270
		// We use the same string as readDigits() purely to save one interned strings
271 12
		return (str_contains('1463720859', $this->bencoded[$this->offset]))
272 3
		     ? new ComplianceError('Illegal character', $this->offset)
273 12
		     : new DecodingException('Illegal character', $this->offset);
274
	}
275
276
	/**
277
	* Return the rightmost boundary to the last safe character that can start a value
278
	*
279
	* Will rewind the boundary to skip the rightmost digits, optionally preceded by "i" or "i-"
280
	*/
281 108
	protected function getSafeBoundary(): int
282
	{
283 108
		if (str_ends_with($this->bencoded, 'e'))
284
		{
285 57
			return $this->len - 1;
286
		}
287
288 51
		preg_match('(i?-?[0-9]*+$)D', $this->bencoded, $m);
289
290 51
		return $this->len - 1 - strlen($m[0] ?? '');
291
	}
292
293 78
	protected function readDigits(string $terminator): string
294
	{
295 78
		if ($this->bencoded[$this->offset] === '0')
296
		{
297 20
			++$this->offset;
298 20
			$string = '0';
299
		}
300
		else
301
		{
302
			// Digits sorted by decreasing frequency as observed on a random sample of torrent files
303
			// which speeds it up on PHP < 8.4
304 67
			$spn = strspn($this->bencoded, '1463720859', $this->offset);
305 67
			if ($spn === 0)
306
			{
307 12
				throw new DecodingException('Illegal character', $this->offset);
308
			}
309 57
			$string = substr($this->bencoded, $this->offset, $spn);
310 57
			$this->offset += $spn;
311
		}
312
313 68
		if ($this->bencoded[$this->offset] !== $terminator)
314
		{
315 12
			throw $this->digitException();
316
		}
317 56
		++$this->offset;
318
319 56
		return $string;
320
	}
321
}