1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | /* |
||
6 | * This file is part of the league/commonmark package. |
||
7 | * |
||
8 | * (c) Colin O'Dell <[email protected]> |
||
9 | * |
||
10 | * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js) |
||
11 | * - (c) John MacFarlane |
||
12 | * |
||
13 | * For the full copyright and license information, please view the LICENSE |
||
14 | * file that was distributed with this source code. |
||
15 | */ |
||
16 | |||
17 | namespace League\CommonMark\Util; |
||
18 | |||
19 | use League\CommonMark\Exception\UnexpectedEncodingException; |
||
20 | |||
21 | /** |
||
22 | * @psalm-immutable |
||
23 | */ |
||
24 | final class UrlEncoder |
||
25 | { |
||
26 | private const ENCODE_CACHE = ['%00', '%01', '%02', '%03', '%04', '%05', '%06', '%07', '%08', '%09', '%0A', '%0B', '%0C', '%0D', '%0E', '%0F', '%10', '%11', '%12', '%13', '%14', '%15', '%16', '%17', '%18', '%19', '%1A', '%1B', '%1C', '%1D', '%1E', '%1F', '%20', '!', '%22', '#', '$', '%25', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '%3C', '=', '%3E', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '%5B', '%5C', '%5D', '%5E', '_', '%60', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '%7B', '%7C', '%7D', '~', '%7F']; |
||
27 | |||
28 | /** |
||
29 | * @psalm-pure |
||
30 | */ |
||
31 | 951 | public static function unescapeAndEncode(string $uri): string |
|
32 | { |
||
33 | // Optimization: if the URL only includes characters we know will be kept as-is, then just return the URL as-is. |
||
34 | 951 | if (\preg_match('/^[A-Za-z0-9~!@#$&*()\-_=+;:,.\/?]+$/', $uri)) { |
|
35 | 744 | return $uri; |
|
36 | } |
||
37 | |||
38 | 219 | $result = ''; |
|
39 | |||
40 | 219 | $chars = \preg_split('//u', $uri, -1, \PREG_SPLIT_NO_EMPTY); |
|
41 | |||
42 | 219 | if (! \is_array($chars) || ! \mb_check_encoding($uri, 'UTF-8')) { |
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
43 | 3 | throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected'); |
|
44 | } |
||
45 | |||
46 | 216 | $l = \count($chars); |
|
47 | 216 | for ($i = 0; $i < $l; $i++) { |
|
48 | 201 | $code = $chars[$i]; |
|
49 | 201 | if ($code === '%' && $i + 2 < $l) { |
|
50 | 99 | if (\preg_match('/^[0-9a-f]{2}$/i', $chars[$i + 1] . $chars[$i + 2]) === 1) { |
|
51 | 96 | $result .= '%' . $chars[$i + 1] . $chars[$i + 2]; |
|
52 | 96 | $i += 2; |
|
53 | 96 | continue; |
|
54 | } |
||
55 | } |
||
56 | |||
57 | 144 | if (\ord($code) < 128) { |
|
58 | 144 | $result .= self::ENCODE_CACHE[\ord($code)]; |
|
59 | 144 | continue; |
|
60 | } |
||
61 | |||
62 | 18 | $result .= \rawurlencode($code); |
|
63 | } |
||
64 | |||
65 | 216 | return $result; |
|
66 | } |
||
67 | } |
||
68 |