Passed
Pull Request — master (#263)
by Alexander
13:46
created

SqlTokenizer   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 307
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 2
Bugs 0 Features 0
Metric Value
wmc 17
eloc 190
c 2
b 0
f 0
dl 0
loc 307
ccs 198
cts 198
cp 1
rs 10

6 Methods

Rating   Name   Duplication   Size   Complexity  
A isOperator() 0 30 1
A isStringLiteral() 0 19 4
A isIdentifier() 0 26 6
B isKeyword() 0 138 2
A isWhitespace() 0 5 1
A isComment() 0 13 3
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Yiisoft\Db\Sqlite;
6
7
use function mb_strtoupper;
8
use function strtr;
9
10
/**
11
 * Splits SQLite queries into individual SQL tokens.
12
 *
13
 * It's used to obtain `CHECK` constraint information from a `CREATE TABLE` SQL code.
14
 *
15
 * @link http://www.sqlite.org/draft/tokenreq.html
16
 * @link https://sqlite.org/lang.html
17
 */
18
final class SqlTokenizer extends AbstractTokenizer
19
{
20
    /**
21
     * Returns whether there's a space at the current offset.
22
     *
23
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string.
24
     *
25
     * @param int $length The length of the matched string.
26
     *
27
     * @return bool Whether there's a space at the current offset.
28
     */
29 157
    protected function isWhitespace(int &$length): bool
30
    {
31 157
        $whitespaces = ["\f" => true, "\n" => true, "\r" => true, "\t" => true, ' ' => true];
32 157
        $length = 1;
33 157
        return isset($whitespaces[$this->substring($length)]);
34
    }
35
36
    /**
37
     * Returns whether there's a commentary at the current offset.
38
     *
39
     * If these methods return `true`, it has to set the `$length` parameter to the length of the matched string.
40
     *
41
     * @param int $length The length of the matched string.
42
     *
43
     * @return bool Whether there's a commentary at the current offset.
44
     */
45 157
    protected function isComment(int &$length): bool
46
    {
47 157
        $comments = ['--' => true, '/*' => true];
48 157
        $length = 2;
49
50 157
        if (!isset($comments[$this->substring($length)])) {
51 157
            return false;
52
        }
53
54 18
        $char = $this->substring($length) === '--' ? "\n" : '*/';
55 18
        $length = $this->indexAfter($char) - $this->offset;
56
57 18
        return true;
58
    }
59
60
    /**
61
     * Returns whether there's an operator at the current offset.
62
     *
63
     * If these methods return `true`, it has to set the `$length` parameter to the length of the matched string.
64
     *
65
     * It may also set `$content` to a string that will be used as a token content.
66
     *
67
     * @param int $length The length of the matched string.
68
     * @param string|null $content Optional content instead of the matched string.
69
     *
70
     * @return bool Whether there's an operator at the current offset.
71
     */
72 157
    protected function isOperator(int &$length, string|null &$content): bool
73
    {
74 157
        $operators = [
75 157
            '!=',
76 157
            '%',
77 157
            '&',
78 157
            '(',
79 157
            ')',
80 157
            '*',
81 157
            '+',
82 157
            ',',
83 157
            '-',
84 157
            '.',
85 157
            '/',
86 157
            ';',
87 157
            '<',
88 157
            '<<',
89 157
            '<=',
90 157
            '<>',
91 157
            '=',
92 157
            '==',
93 157
            '>',
94 157
            '>=',
95 157
            '>>',
96 157
            '|',
97 157
            '||',
98 157
            '~',
99 157
        ];
100
101 157
        return $this->startsWithAnyLongest($operators, true, $length);
102
    }
103
104
    /**
105
     * Returns whether there's an identifier at the current offset.
106
     *
107
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string.
108
     *
109
     * It may also set `$content` to a string that will be used as a token content.
110
     *
111
     * @param int $length The length of the matched string.
112
     * @param string|null $content Optional content instead of the matched string.
113
     *
114
     * @return bool Whether there's an identifier at the current offset.
115
     */
116 157
    protected function isIdentifier(int &$length, string|null &$content): bool
117
    {
118 157
        $identifierDelimiters = ['"' => '"', '[' => ']', '`' => '`'];
119
120 157
        if (!isset($identifierDelimiters[$this->substring(1)])) {
121 157
            return false;
122
        }
123
124 116
        $delimiter = $identifierDelimiters[$this->substring(1)];
125 116
        $offset = $this->offset;
126
127 116
        while (true) {
128 116
            $offset = $this->indexAfter($delimiter, $offset + 1);
129 116
            if ($delimiter === ']' || $this->substring(1, true, $offset) !== $delimiter) {
130 116
                break;
131
            }
132
        }
133
134 116
        $length = $offset - $this->offset;
135 116
        $content = $this->substring($length - 2, true, $this->offset + 1);
136
137 116
        if ($delimiter !== ']') {
138 116
            $content = strtr($content, ["$delimiter$delimiter" => $delimiter]);
139
        }
140
141 116
        return true;
142
    }
143
144
    /**
145
     * Returns whether there's a string literal at the current offset.
146
     *
147
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string.
148
     *
149
     * It may also set `$content` to a string that will be used as a token content.
150
     *
151
     * @param int $length The length of the matched string.
152
     * @param string|null $content Optional content instead of the matched string.
153
     *
154
     * @return bool Whether there's a string literal at the current offset.
155
     */
156 157
    protected function isStringLiteral(int &$length, string|null &$content): bool
157
    {
158 157
        if ($this->substring(1) !== "'") {
159 157
            return false;
160
        }
161
162 29
        $offset = $this->offset;
163
164 29
        while (true) {
165 29
            $offset = $this->indexAfter("'", $offset + 1);
166 29
            if ($this->substring(1, true, $offset) !== "'") {
167 29
                break;
168
            }
169
        }
170
171 29
        $length = $offset - $this->offset;
172 29
        $content = strtr($this->substring($length - 2, true, $this->offset + 1), ["''" => "'"]);
173
174 29
        return true;
175
    }
176
177
    /**
178
     * Returns whether the given string is a keyword.
179
     *
180
     * The method may set `$content` to a string that will be used as a token content.
181
     *
182
     * @param string $string The string to match.
183
     * @param string|null $content Optional content instead of the matched string.
184
     *
185
     * @return bool Whether the given string is a keyword.
186
     */
187 157
    protected function isKeyword(string $string, string|null &$content): bool
188
    {
189 157
        $keywords = [
190 157
            'ABORT' => true,
191 157
            'ACTION' => true,
192 157
            'ADD' => true,
193 157
            'AFTER' => true,
194 157
            'ALL' => true,
195 157
            'ALTER' => true,
196 157
            'ANALYZE' => true,
197 157
            'AND' => true,
198 157
            'AS' => true,
199 157
            'ASC' => true,
200 157
            'ATTACH' => true,
201 157
            'AUTOINCREMENT' => true,
202 157
            'BEFORE' => true,
203 157
            'BEGIN' => true,
204 157
            'BETWEEN' => true,
205 157
            'BY' => true,
206 157
            'CASCADE' => true,
207 157
            'CASE' => true,
208 157
            'CAST' => true,
209 157
            'CHECK' => true,
210 157
            'COLLATE' => true,
211 157
            'COLUMN' => true,
212 157
            'COMMIT' => true,
213 157
            'CONFLICT' => true,
214 157
            'CONSTRAINT' => true,
215 157
            'CREATE' => true,
216 157
            'CROSS' => true,
217 157
            'CURRENT_DATE' => true,
218 157
            'CURRENT_TIME' => true,
219 157
            'CURRENT_TIMESTAMP' => true,
220 157
            'DATABASE' => true,
221 157
            'DEFAULT' => true,
222 157
            'DEFERRABLE' => true,
223 157
            'DEFERRED' => true,
224 157
            'DELETE' => true,
225 157
            'DESC' => true,
226 157
            'DETACH' => true,
227 157
            'DISTINCT' => true,
228 157
            'DROP' => true,
229 157
            'EACH' => true,
230 157
            'ELSE' => true,
231 157
            'END' => true,
232 157
            'ESCAPE' => true,
233 157
            'EXCEPT' => true,
234 157
            'EXCLUSIVE' => true,
235 157
            'EXISTS' => true,
236 157
            'EXPLAIN' => true,
237 157
            'FAIL' => true,
238 157
            'FOR' => true,
239 157
            'FOREIGN' => true,
240 157
            'FROM' => true,
241 157
            'FULL' => true,
242 157
            'GLOB' => true,
243 157
            'GROUP' => true,
244 157
            'HAVING' => true,
245 157
            'IF' => true,
246 157
            'IGNORE' => true,
247 157
            'IMMEDIATE' => true,
248 157
            'IN' => true,
249 157
            'INDEX' => true,
250 157
            'INDEXED' => true,
251 157
            'INITIALLY' => true,
252 157
            'INNER' => true,
253 157
            'INSERT' => true,
254 157
            'INSTEAD' => true,
255 157
            'INTERSECT' => true,
256 157
            'INTO' => true,
257 157
            'IS' => true,
258 157
            'ISNULL' => true,
259 157
            'JOIN' => true,
260 157
            'KEY' => true,
261 157
            'LEFT' => true,
262 157
            'LIKE' => true,
263 157
            'LIMIT' => true,
264 157
            'MATCH' => true,
265 157
            'NATURAL' => true,
266 157
            'NO' => true,
267 157
            'NOT' => true,
268 157
            'NOTNULL' => true,
269 157
            'NULL' => true,
270 157
            'OF' => true,
271 157
            'OFFSET' => true,
272 157
            'ON' => true,
273 157
            'OR' => true,
274 157
            'ORDER' => true,
275 157
            'OUTER' => true,
276 157
            'PLAN' => true,
277 157
            'PRAGMA' => true,
278 157
            'PRIMARY' => true,
279 157
            'QUERY' => true,
280 157
            'RAISE' => true,
281 157
            'RECURSIVE' => true,
282 157
            'REFERENCES' => true,
283 157
            'REGEXP' => true,
284 157
            'REINDEX' => true,
285 157
            'RELEASE' => true,
286 157
            'RENAME' => true,
287 157
            'REPLACE' => true,
288 157
            'RESTRICT' => true,
289 157
            'RIGHT' => true,
290 157
            'ROLLBACK' => true,
291 157
            'ROW' => true,
292 157
            'SAVEPOINT' => true,
293 157
            'SELECT' => true,
294 157
            'SET' => true,
295 157
            'TABLE' => true,
296 157
            'TEMP' => true,
297 157
            'TEMPORARY' => true,
298 157
            'THEN' => true,
299 157
            'TO' => true,
300 157
            'TRANSACTION' => true,
301 157
            'TRIGGER' => true,
302 157
            'UNION' => true,
303 157
            'UNIQUE' => true,
304 157
            'UPDATE' => true,
305 157
            'USING' => true,
306 157
            'VACUUM' => true,
307 157
            'VALUES' => true,
308 157
            'VIEW' => true,
309 157
            'VIRTUAL' => true,
310 157
            'WHEN' => true,
311 157
            'WHERE' => true,
312 157
            'WITH' => true,
313 157
            'WITHOUT' => true,
314 157
        ];
315
316 157
        $string = mb_strtoupper($string, 'UTF-8');
317
318 157
        if (!isset($keywords[$string])) {
319 156
            return false;
320
        }
321
322 157
        $content = $string;
323
324 157
        return true;
325
    }
326
}
327