UrlEncoder::unescapeAndEncode()   B
last analyzed

Complexity

Conditions 9
Paths 8

Size

Total Lines 35
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 20
CRAP Score 9

Importance

Changes 2
Bugs 0 Features 0
Metric Value
eloc 19
dl 0
loc 35
ccs 20
cts 20
cp 1
rs 8.0555
c 2
b 0
f 0
cc 9
nc 8
nop 1
crap 9
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
11
 *  - (c) John MacFarlane
12
 *
13
 * For the full copyright and license information, please view the LICENSE
14
 * file that was distributed with this source code.
15
 */
16
17
namespace League\CommonMark\Util;
18
19
use League\CommonMark\Exception\UnexpectedEncodingException;
20
21
/**
22
 * @psalm-immutable
23
 */
24
final class UrlEncoder
25
{
26
    private const ENCODE_CACHE = ['%00', '%01', '%02', '%03', '%04', '%05', '%06', '%07', '%08', '%09', '%0A', '%0B', '%0C', '%0D', '%0E', '%0F', '%10', '%11', '%12', '%13', '%14', '%15', '%16', '%17', '%18', '%19', '%1A', '%1B', '%1C', '%1D', '%1E', '%1F', '%20', '!', '%22', '#', '$', '%25', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '%3C', '=', '%3E', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '%5B', '%5C', '%5D', '%5E', '_', '%60', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '%7B', '%7C', '%7D', '~', '%7F'];
27
28
    /**
29
     * @psalm-pure
30
     */
31 951
    public static function unescapeAndEncode(string $uri): string
32
    {
33
        // Optimization: if the URL only includes characters we know will be kept as-is, then just return the URL as-is.
34 951
        if (\preg_match('/^[A-Za-z0-9~!@#$&*()\-_=+;:,.\/?]+$/', $uri)) {
35 744
            return $uri;
36
        }
37
38 219
        $result = '';
39
40 219
        $chars = \preg_split('//u', $uri, -1, \PREG_SPLIT_NO_EMPTY);
41
42 219
        if (! \is_array($chars) || ! \mb_check_encoding($uri, 'UTF-8')) {
0 ignored issues
show
introduced by
The condition is_array($chars) is always true.
Loading history...
43 3
            throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected');
44
        }
45
46 216
        $l = \count($chars);
47 216
        for ($i = 0; $i < $l; $i++) {
48 201
            $code = $chars[$i];
49 201
            if ($code === '%' && $i + 2 < $l) {
50 99
                if (\preg_match('/^[0-9a-f]{2}$/i', $chars[$i + 1] . $chars[$i + 2]) === 1) {
51 96
                    $result .= '%' . $chars[$i + 1] . $chars[$i + 2];
52 96
                    $i      += 2;
53 96
                    continue;
54
                }
55
            }
56
57 144
            if (\ord($code) < 128) {
58 144
                $result .= self::ENCODE_CACHE[\ord($code)];
59 144
                continue;
60
            }
61
62 18
            $result .= \rawurlencode($code);
63
        }
64
65 216
        return $result;
66
    }
67
}
68