UrlEncoder::unescapeAndEncode()   B
last analyzed

Complexity

Conditions 8
Paths 8

Size

Total Lines 35
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 20
CRAP Score 8

Importance

Changes 0
Metric Value
cc 8
eloc 19
c 0
b 0
f 0
nc 8
nop 1
dl 0
loc 35
ccs 20
cts 20
cp 1
crap 8
rs 8.4444
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
11
 *  - (c) John MacFarlane
12
 *
13
 * For the full copyright and license information, please view the LICENSE
14
 * file that was distributed with this source code.
15
 */
16
17
namespace League\CommonMark\Util;
18
19
use League\CommonMark\Exception\UnexpectedEncodingException;
20
21
/**
22
 * @psalm-immutable
23
 */
24
final class UrlEncoder
25
{
26
    private const ENCODE_CACHE = ['%00', '%01', '%02', '%03', '%04', '%05', '%06', '%07', '%08', '%09', '%0A', '%0B', '%0C', '%0D', '%0E', '%0F', '%10', '%11', '%12', '%13', '%14', '%15', '%16', '%17', '%18', '%19', '%1A', '%1B', '%1C', '%1D', '%1E', '%1F', '%20', '!', '%22', '#', '$', '%25', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '%3C', '=', '%3E', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '%5B', '%5C', '%5D', '%5E', '_', '%60', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '%7B', '%7C', '%7D', '~', '%7F'];
27
28
    /**
29
     * @throws UnexpectedEncodingException if a non-UTF-8-compatible encoding is used
30
     *
31
     * @psalm-pure
32
     */
33 700
    public static function unescapeAndEncode(string $uri): string
34
    {
35
        // Optimization: if the URL only includes characters we know will be kept as-is, then just return the URL as-is.
36 700
        if (\preg_match('/^[A-Za-z0-9~!@#$&*()\-_=+;:,.\/?]+$/', $uri)) {
37 554
            return $uri;
38
        }
39
40 154
        if (! \mb_check_encoding($uri, 'UTF-8')) {
41 2
            throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected');
42
        }
43
44 152
        $result = '';
45
46 152
        $chars = \mb_str_split($uri, 1, 'UTF-8');
47
48 152
        $l = \count($chars);
49 152
        for ($i = 0; $i < $l; $i++) {
50 140
            $code = $chars[$i];
51 140
            if ($code === '%' && $i + 2 < $l) {
52 66
                if (\preg_match('/^[0-9a-f]{2}$/i', $chars[$i + 1] . $chars[$i + 2]) === 1) {
53 64
                    $result .= '%' . $chars[$i + 1] . $chars[$i + 2];
54 64
                    $i      += 2;
55 64
                    continue;
56
                }
57
            }
58
59 102
            if (\ord($code) < 128) {
60 102
                $result .= self::ENCODE_CACHE[\ord($code)];
61 102
                continue;
62
            }
63
64 12
            $result .= \rawurlencode($code);
65
        }
66
67 152
        return $result;
68
    }
69
}
70