Passed
Pull Request — master (#506)
by
unknown
02:55
created

UtfString::getCharLength()   B

Complexity

Conditions 7
Paths 12

Size

Total Lines 33
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 7

Importance

Changes 0
Metric Value
cc 7
eloc 14
nc 12
nop 1
dl 0
loc 33
ccs 15
cts 15
cp 1
crap 7
rs 8.8333
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace PhpMyAdmin\SqlParser;
6
7
use ArrayAccess;
8
use Exception;
9
use Stringable;
10
11
use function mb_check_encoding;
12
use function mb_strlen;
13
use function mb_substr;
14
use function ord;
15
use function strlen;
16
use function substr;
17
18
/**
19
 * Implementation for UTF-8 strings.
20
 *
21
 * The subscript operator in PHP, when used with string will return a byte and not a character. Because in UTF-8
22
 * strings a character may occupy more than one byte, the subscript operator may return an invalid character.
23
 *
24
 * Because the lexer relies on the subscript operator this class had to be implemented.
25
 *
26
 * Implements array-like access for UTF-8 strings.
27
 *
28
 * In this library, this class should be used to parse UTF-8 queries.
29
 *
30
 * @implements ArrayAccess<int, string>
31
 */
32
class UtfString implements ArrayAccess, Stringable
33
{
34
    /**
35
     * The raw, multi-byte string.
36
     *
37
     * @var string
38
     */
39
    public $str = '';
40
41
    /**
42
     * The index of current byte.
43
     *
44
     * For ASCII strings, the byte index is equal to the character index.
45
     *
46
     * @var int
47
     */
48
    public $byteIdx = 0;
49
50
    /**
51
     * The index of current character.
52
     *
53
     * For non-ASCII strings, some characters occupy more than one byte and
54
     * the character index will have a lower value than the byte index.
55
     *
56
     * @var int
57
     */
58
    public $charIdx = 0;
59
60
    /**
61
     * The length of the string (in bytes).
62
     *
63
     * @var int
64
     */
65
    public $byteLen = 0;
66
67
    /**
68
     * The length of the string (in characters).
69
     *
70
     * @var int
71
     */
72
    public $charLen = 0;
73
74
    /**
75
     * @param string $str the string
76
     */
77 26
    public function __construct($str)
78
    {
79 26
        $this->str = $str;
80 26
        $this->byteLen = mb_strlen($str, '8bit');
81 26
        if (! mb_check_encoding($str, 'UTF-8')) {
82 2
            $this->charLen = 0;
83
        } else {
84 24
            $this->charLen = mb_strlen($str, 'UTF-8');
85
        }
86
    }
87
88
    /**
89
     * Checks if the given offset exists.
90
     *
91
     * @param int $offset the offset to be checked
92
     */
93 2
    public function offsetExists($offset): bool
94
    {
95 2
        return ($offset >= 0) && ($offset < $this->charLen);
96
    }
97
98
    /**
99
     * Gets the character at given offset.
100
     *
101
     * @param int $offset the offset to be returned
102
     */
103 20
    public function offsetGet($offset): string|null
104
    {
105 20
        if (($offset < 0) || ($offset >= $this->charLen)) {
106 4
            return null;
107
        }
108
109 18
        $delta = $offset - $this->charIdx;
110
111 18
        if ($delta > 0) {
112
            // Fast forwarding.
113 18
            while ($delta-- > 0) {
114 18
                $this->byteIdx += strlen(mb_substr(substr($this->str, $this->byteIdx, 4), 0, 1));
115 18
                ++$this->charIdx;
116
            }
117 16
        } elseif ($delta < 0) {
118
            // Rewinding.
119 16
            while ($delta++ < 0) {
120
                do {
121 16
                    $byte = ord($this->str[--$this->byteIdx]);
122 16
                } while (($byte >= 128) && ($byte < 192));
123
124 16
                --$this->charIdx;
125
            }
126
        }
127
128 18
        return mb_substr(substr($this->str, $this->byteIdx, 4), 0, 1);
129
    }
130
131
    /**
132
     * Sets the value of a character.
133
     *
134
     * @param int    $offset the offset to be set
135
     * @param string $value  the value to be set
136
     *
137
     * @throws Exception not implemented.
138
     */
139 2
    public function offsetSet($offset, $value): void
140
    {
141 2
        throw new Exception('Not implemented.');
142
    }
143
144
    /**
145
     * Unsets an index.
146
     *
147
     * @param int $offset the value to be unset
148
     *
149
     * @throws Exception not implemented.
150
     */
151 2
    public function offsetUnset($offset): void
152
    {
153 2
        throw new Exception('Not implemented.');
154
    }
155
156
    /**
157
     * Returns the length in characters of the string.
158
     */
159 10
    public function length(): int
160
    {
161 10
        return $this->charLen;
162
    }
163
164
    /**
165
     * Returns the contained string.
166
     */
167 2
    public function __toString(): string
168
    {
169 2
        return $this->str;
170
    }
171
}
172