Passed
Pull Request — master (#263)
by Sergei
04:53 queued 51s
created

SqlTokenizer   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 307
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 2
Bugs 0 Features 0
Metric Value
wmc 17
eloc 190
c 2
b 0
f 0
dl 0
loc 307
ccs 198
cts 198
cp 1
rs 10

6 Methods

Rating   Name   Duplication   Size   Complexity  
A isOperator() 0 30 1
A isStringLiteral() 0 19 4
A isIdentifier() 0 26 6
B isKeyword() 0 138 2
A isWhitespace() 0 5 1
A isComment() 0 13 3
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Yiisoft\Db\Sqlite;
6
7
use function mb_strtoupper;
8
use function strtr;
9
10
/**
11
 * Splits SQLite queries into individual SQL tokens.
12
 *
13
 * It's used to obtain `CHECK` constraint information from a `CREATE TABLE` SQL code.
14
 *
15
 * @link http://www.sqlite.org/draft/tokenreq.html
16
 * @link https://sqlite.org/lang.html
17
 */
18
final class SqlTokenizer extends AbstractTokenizer
19
{
20
    /**
21
     * Returns whether there's a space at the current offset.
22
     *
23
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string.
24
     *
25
     * @param int $length The length of the matched string.
26
     *
27
     * @return bool Whether there's a space at the current offset.
28
     */
29 156
    protected function isWhitespace(int &$length): bool
30
    {
31 156
        $whitespaces = ["\f" => true, "\n" => true, "\r" => true, "\t" => true, ' ' => true];
32 156
        $length = 1;
33 156
        return isset($whitespaces[$this->substring($length)]);
34
    }
35
36
    /**
37
     * Returns whether there's a commentary at the current offset.
38
     *
39
     * If these methods return `true`, it has to set the `$length` parameter to the length of the matched string.
40
     *
41
     * @param int $length The length of the matched string.
42
     *
43
     * @return bool Whether there's a commentary at the current offset.
44
     */
45 156
    protected function isComment(int &$length): bool
46
    {
47 156
        $comments = ['--' => true, '/*' => true];
48 156
        $length = 2;
49
50 156
        if (!isset($comments[$this->substring($length)])) {
51 156
            return false;
52
        }
53
54 18
        $char = $this->substring($length) === '--' ? "\n" : '*/';
55 18
        $length = $this->indexAfter($char) - $this->offset;
56
57 18
        return true;
58
    }
59
60
    /**
61
     * Returns whether there's an operator at the current offset.
62
     *
63
     * If these methods return `true`, it has to set the `$length` parameter to the length of the matched string.
64
     *
65
     * It may also set `$content` to a string that will be used as a token content.
66
     *
67
     * @param int $length The length of the matched string.
68
     * @param string|null $content Optional content instead of the matched string.
69
     *
70
     * @return bool Whether there's an operator at the current offset.
71
     */
72 156
    protected function isOperator(int &$length, string|null &$content): bool
73
    {
74 156
        $operators = [
75 156
            '!=',
76 156
            '%',
77 156
            '&',
78 156
            '(',
79 156
            ')',
80 156
            '*',
81 156
            '+',
82 156
            ',',
83 156
            '-',
84 156
            '.',
85 156
            '/',
86 156
            ';',
87 156
            '<',
88 156
            '<<',
89 156
            '<=',
90 156
            '<>',
91 156
            '=',
92 156
            '==',
93 156
            '>',
94 156
            '>=',
95 156
            '>>',
96 156
            '|',
97 156
            '||',
98 156
            '~',
99 156
        ];
100
101 156
        return $this->startsWithAnyLongest($operators, true, $length);
102
    }
103
104
    /**
105
     * Returns whether there's an identifier at the current offset.
106
     *
107
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string.
108
     *
109
     * It may also set `$content` to a string that will be used as a token content.
110
     *
111
     * @param int $length The length of the matched string.
112
     * @param string|null $content Optional content instead of the matched string.
113
     *
114
     * @return bool Whether there's an identifier at the current offset.
115
     */
116 156
    protected function isIdentifier(int &$length, string|null &$content): bool
117
    {
118 156
        $identifierDelimiters = ['"' => '"', '[' => ']', '`' => '`'];
119
120 156
        if (!isset($identifierDelimiters[$this->substring(1)])) {
121 156
            return false;
122
        }
123
124 116
        $delimiter = $identifierDelimiters[$this->substring(1)];
125 116
        $offset = $this->offset;
126
127 116
        while (true) {
128 116
            $offset = $this->indexAfter($delimiter, $offset + 1);
129 116
            if ($delimiter === ']' || $this->substring(1, true, $offset) !== $delimiter) {
130 116
                break;
131
            }
132
        }
133
134 116
        $length = $offset - $this->offset;
135 116
        $content = $this->substring($length - 2, true, $this->offset + 1);
136
137 116
        if ($delimiter !== ']') {
138 116
            $content = strtr($content, ["$delimiter$delimiter" => $delimiter]);
139
        }
140
141 116
        return true;
142
    }
143
144
    /**
145
     * Returns whether there's a string literal at the current offset.
146
     *
147
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string.
148
     *
149
     * It may also set `$content` to a string that will be used as a token content.
150
     *
151
     * @param int $length The length of the matched string.
152
     * @param string|null $content Optional content instead of the matched string.
153
     *
154
     * @return bool Whether there's a string literal at the current offset.
155
     */
156 156
    protected function isStringLiteral(int &$length, string|null &$content): bool
157
    {
158 156
        if ($this->substring(1) !== "'") {
159 156
            return false;
160
        }
161
162 29
        $offset = $this->offset;
163
164 29
        while (true) {
165 29
            $offset = $this->indexAfter("'", $offset + 1);
166 29
            if ($this->substring(1, true, $offset) !== "'") {
167 29
                break;
168
            }
169
        }
170
171 29
        $length = $offset - $this->offset;
172 29
        $content = strtr($this->substring($length - 2, true, $this->offset + 1), ["''" => "'"]);
173
174 29
        return true;
175
    }
176
177
    /**
178
     * Returns whether the given string is a keyword.
179
     *
180
     * The method may set `$content` to a string that will be used as a token content.
181
     *
182
     * @param string $string The string to match.
183
     * @param string|null $content Optional content instead of the matched string.
184
     *
185
     * @return bool Whether the given string is a keyword.
186
     */
187 156
    protected function isKeyword(string $string, string|null &$content): bool
188
    {
189 156
        $keywords = [
190 156
            'ABORT' => true,
191 156
            'ACTION' => true,
192 156
            'ADD' => true,
193 156
            'AFTER' => true,
194 156
            'ALL' => true,
195 156
            'ALTER' => true,
196 156
            'ANALYZE' => true,
197 156
            'AND' => true,
198 156
            'AS' => true,
199 156
            'ASC' => true,
200 156
            'ATTACH' => true,
201 156
            'AUTOINCREMENT' => true,
202 156
            'BEFORE' => true,
203 156
            'BEGIN' => true,
204 156
            'BETWEEN' => true,
205 156
            'BY' => true,
206 156
            'CASCADE' => true,
207 156
            'CASE' => true,
208 156
            'CAST' => true,
209 156
            'CHECK' => true,
210 156
            'COLLATE' => true,
211 156
            'COLUMN' => true,
212 156
            'COMMIT' => true,
213 156
            'CONFLICT' => true,
214 156
            'CONSTRAINT' => true,
215 156
            'CREATE' => true,
216 156
            'CROSS' => true,
217 156
            'CURRENT_DATE' => true,
218 156
            'CURRENT_TIME' => true,
219 156
            'CURRENT_TIMESTAMP' => true,
220 156
            'DATABASE' => true,
221 156
            'DEFAULT' => true,
222 156
            'DEFERRABLE' => true,
223 156
            'DEFERRED' => true,
224 156
            'DELETE' => true,
225 156
            'DESC' => true,
226 156
            'DETACH' => true,
227 156
            'DISTINCT' => true,
228 156
            'DROP' => true,
229 156
            'EACH' => true,
230 156
            'ELSE' => true,
231 156
            'END' => true,
232 156
            'ESCAPE' => true,
233 156
            'EXCEPT' => true,
234 156
            'EXCLUSIVE' => true,
235 156
            'EXISTS' => true,
236 156
            'EXPLAIN' => true,
237 156
            'FAIL' => true,
238 156
            'FOR' => true,
239 156
            'FOREIGN' => true,
240 156
            'FROM' => true,
241 156
            'FULL' => true,
242 156
            'GLOB' => true,
243 156
            'GROUP' => true,
244 156
            'HAVING' => true,
245 156
            'IF' => true,
246 156
            'IGNORE' => true,
247 156
            'IMMEDIATE' => true,
248 156
            'IN' => true,
249 156
            'INDEX' => true,
250 156
            'INDEXED' => true,
251 156
            'INITIALLY' => true,
252 156
            'INNER' => true,
253 156
            'INSERT' => true,
254 156
            'INSTEAD' => true,
255 156
            'INTERSECT' => true,
256 156
            'INTO' => true,
257 156
            'IS' => true,
258 156
            'ISNULL' => true,
259 156
            'JOIN' => true,
260 156
            'KEY' => true,
261 156
            'LEFT' => true,
262 156
            'LIKE' => true,
263 156
            'LIMIT' => true,
264 156
            'MATCH' => true,
265 156
            'NATURAL' => true,
266 156
            'NO' => true,
267 156
            'NOT' => true,
268 156
            'NOTNULL' => true,
269 156
            'NULL' => true,
270 156
            'OF' => true,
271 156
            'OFFSET' => true,
272 156
            'ON' => true,
273 156
            'OR' => true,
274 156
            'ORDER' => true,
275 156
            'OUTER' => true,
276 156
            'PLAN' => true,
277 156
            'PRAGMA' => true,
278 156
            'PRIMARY' => true,
279 156
            'QUERY' => true,
280 156
            'RAISE' => true,
281 156
            'RECURSIVE' => true,
282 156
            'REFERENCES' => true,
283 156
            'REGEXP' => true,
284 156
            'REINDEX' => true,
285 156
            'RELEASE' => true,
286 156
            'RENAME' => true,
287 156
            'REPLACE' => true,
288 156
            'RESTRICT' => true,
289 156
            'RIGHT' => true,
290 156
            'ROLLBACK' => true,
291 156
            'ROW' => true,
292 156
            'SAVEPOINT' => true,
293 156
            'SELECT' => true,
294 156
            'SET' => true,
295 156
            'TABLE' => true,
296 156
            'TEMP' => true,
297 156
            'TEMPORARY' => true,
298 156
            'THEN' => true,
299 156
            'TO' => true,
300 156
            'TRANSACTION' => true,
301 156
            'TRIGGER' => true,
302 156
            'UNION' => true,
303 156
            'UNIQUE' => true,
304 156
            'UPDATE' => true,
305 156
            'USING' => true,
306 156
            'VACUUM' => true,
307 156
            'VALUES' => true,
308 156
            'VIEW' => true,
309 156
            'VIRTUAL' => true,
310 156
            'WHEN' => true,
311 156
            'WHERE' => true,
312 156
            'WITH' => true,
313 156
            'WITHOUT' => true,
314 156
        ];
315
316 156
        $string = mb_strtoupper($string, 'UTF-8');
317
318 156
        if (!isset($keywords[$string])) {
319 155
            return false;
320
        }
321
322 156
        $content = $string;
323
324 156
        return true;
325
    }
326
}
327