Test Failed
Pull Request — master (#82)
by Wilmer
24:21 queued 13:10
created

BaseTokenizer::advance()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 1
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 4
dl 0
loc 8
ccs 1
cts 1
cp 1
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 1
crap 2
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Yiisoft\Db\Sqlite;
6
7
use SplStack;
8
use Yiisoft\Db\Exception\InvalidArgumentException;
9
10
use function is_array;
11
use function is_string;
12
use function mb_strlen;
13
use function mb_strpos;
14
use function mb_strtoupper;
15
use function mb_substr;
16
use function reset;
17
use function usort;
18
19
/**
20
 * BaseTokenizer splits an SQL query into individual SQL tokens.
21
 *
22
 * It can be used to obtain an addition information from an SQL code.
23
 *
24
 * Usage example:
25
 *
26
 * ```php
27
 * $tokenizer = new SqlTokenizer("SELECT * FROM user WHERE id = 1");
28
 * $root = $tokeinzer->tokenize();
29
 * $sqlTokens = $root->getChildren();
30
 * ```
31
 *
32
 * Tokens are instances of {@see SqlToken}.
33
 */
34
abstract class BaseTokenizer
35
{
36
    /**
37
     * @var string SQL code.
38
     */
39
    private string $sql;
40
41
    /**
42
     * @var int SQL code string length.
43
     */
44
    protected int $length = 0;
45
46
    /**
47
     * @var int SQL code string current offset.
48
     */
49
    protected int $offset = 0;
50
51
    /**
52
     * @var SplStack of active tokens.
53
     *
54
     * @psalm-var SplStack<SqlToken>
55
     * @psalm-suppress PropertyNotSetInConstructor
56
     */
57
    private SplStack $tokenStack;
58
59
    /**
60
     * @var SqlToken|SqlToken[] active token. It's usually a top of the token stack.
61
     *
62
     * @psalm-suppress PropertyNotSetInConstructor
63
     */
64
    private $currentToken;
65
66
    /**
67
     * @var string[] cached substrings.
68
     */
69
    private array $substrings = [];
70
71
    /**
72
     * @var string string current buffer value.
73
     */
74
    private string $buffer = '';
75
76 22
    /**
77
     * @var SqlToken|null resulting token of a last {@see tokenize()} call.
78 22
     */
79 22
    private ?SqlToken $token = null;
80
81
    public function __construct(string $sql)
82
    {
83
        $this->sql = $sql;
84
    }
85
86
    /**
87
     * Tokenizes and returns a code type token.
88 22
     *
89
     * @throws InvalidArgumentException
90 22
     *
91 22
     * @return SqlToken code type token.
92 22
     */
93 22
    public function tokenize(): SqlToken
94
    {
95 22
        $this->length = mb_strlen($this->sql, 'UTF-8');
96 22
        $this->offset = 0;
97 22
        $this->substrings = [];
98
        $this->buffer = '';
99 22
        $this->token = (new SqlToken())->type(SqlToken::TYPE_CODE)->content($this->sql);
100 22
        $this->tokenStack = new SplStack();
101
        $this->tokenStack->push($this->token);
102 22
        $this->token[] = (new SqlToken())->type(SqlToken::TYPE_STATEMENT);
103 22
        $this->tokenStack->push($this->token[0]);
104
        /** @var SqlToken */
105 22
        $this->currentToken = $this->tokenStack->top();
0 ignored issues
show
Bug introduced by
The property currentToken does not seem to exist on Yiisoft\Db\Sqlite\SqlToken.
Loading history...
106
107 22
        $length = 0;
108 22
109
        while (!$this->isEof()) {
110 22
            if ($this->isWhitespace($length) || $this->isComment($length)) {
111 22
                $this->addTokenFromBuffer();
112 22
                $this->advance($length);
113 22
114
                continue;
115 22
            }
116
117
            /** @psalm-suppress ConflictingReferenceConstraint */
118
            if ($this->tokenizeOperator($length) || $this->tokenizeDelimitedString($length)) {
119 22
                $this->advance($length);
120 22
121
                continue;
122 22
            }
123
124
            $this->buffer .= $this->substring(1);
125 22
            $this->advance(1);
126 22
        }
127
128 22
        $this->addTokenFromBuffer();
129 22
130 5
        if (
131
            $this->token->getHasChildren() &&
132
            $this->token[-1] instanceof SqlToken &&
133 22
            !$this->token[-1]->getHasChildren()
0 ignored issues
show
Bug introduced by
The method getHasChildren() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

133
            !$this->token[-1]->/** @scrutinizer ignore-call */ getHasChildren()

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
134
        ) {
135
            unset($this->token[-1]);
136
        }
137
138
        return $this->token;
139
    }
140
141
    /**
142
     * Returns whether there's a whitespace at the current offset.
143
     *
144
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string.
145
     *
146
     * @param int $length length of the matched string.
147
     *
148
     * @return bool whether there's a whitespace at the current offset.
149
     */
150
    abstract protected function isWhitespace(int &$length): bool;
151
152
    /**
153
     * Returns whether there's a commentary at the current offset.
154
     *
155
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string.
156
     *
157
     * @param int $length length of the matched string.
158
     *
159
     * @return bool whether there's a commentary at the current offset.
160
     */
161
    abstract protected function isComment(int &$length): bool;
162
163
    /**
164
     * Returns whether there's an operator at the current offset.
165
     *
166
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string. It may
167
     * also set `$content` to a string that will be used as a token content.
168
     *
169
     * @param int $length  length of the matched string.
170
     * @param string|null $content optional content instead of the matched string.
171
     *
172
     * @return bool whether there's an operator at the current offset.
173
     */
174
    abstract protected function isOperator(int &$length, ?string &$content): bool;
175
176
    /**
177
     * Returns whether there's an identifier at the current offset.
178
     *
179
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string. It may
180
     * also set `$content` to a string that will be used as a token content.
181
     *
182
     * @param int $length length of the matched string.
183
     * @param string|null $content optional content instead of the matched string.
184
     *
185
     * @return bool whether there's an identifier at the current offset.
186
     */
187
    abstract protected function isIdentifier(int &$length, ?string &$content): bool;
188
189
    /**
190
     * Returns whether there's a string literal at the current offset.
191
     *
192
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string. It may
193
     * also set `$content` to a string that will be used as a token content.
194
     *
195
     * @param int $length  length of the matched string.
196
     * @param string|null $content optional content instead of the matched string.
197
     *
198
     * @return bool whether there's a string literal at the current offset.
199
     */
200
    abstract protected function isStringLiteral(int &$length, ?string &$content): bool;
201
202
    /**
203
     * Returns whether the given string is a keyword.
204
     *
205
     * The method may set `$content` to a string that will be used as a token content.
206
     *
207
     * @param string $string  string to be matched.
208
     * @param string|null $content optional content instead of the matched string.
209
     *
210
     * @return bool whether the given string is a keyword.
211
     */
212
    abstract protected function isKeyword(string $string, ?string &$content): bool;
213
214
    /**
215
     * @param string $sql
216
     */
217
    public function setSql(string $sql): void
218
    {
219
        $this->sql = $sql;
220
    }
221
222
    /**
223
     * Returns whether the longest common prefix equals to the SQL code of the same length at the current offset.
224
     *
225
     * @param array $with strings to be tested. The method `will` modify this parameter to speed up lookups.
226
     * @param bool $caseSensitive whether to perform a case sensitive comparison.
227 22
     * @param int $length length of the matched string.
228
     * @param string|null $content matched string.
229
     *
230
     * @return bool whether a match is found.
231
     *
232
     * @psalm-param array<array-key, string> $with
233 22
     */
234
    protected function startsWithAnyLongest(
235
        array $with,
236
        bool $caseSensitive,
237 22
        int &$length,
238 1
        ?string &$content = null
239 1
    ): bool {
240 1
        if (empty($with)) {
241
            return false;
242 1
        }
243
244 1
        if (!is_array(reset($with))) {
245 1
            usort($with, static function (string $string1, string $string2) {
246
                return mb_strlen($string2, 'UTF-8') - mb_strlen($string1, 'UTF-8');
247
            });
248 1
249
            $map = [];
250 22
251 22
            foreach ($with as $string) {
252
                $map[mb_strlen($string, 'UTF-8')][$caseSensitive ? $string : mb_strtoupper($string, 'UTF-8')] = true;
253 22
            }
254 22
255
            $with = $map;
256 22
        }
257
258
        /** @psalm-var array<int, array> $with */
259
        foreach ($with as $testLength => $testValues) {
260 22
            $content = $this->substring($testLength, $caseSensitive);
261
262
            if (isset($testValues[$content])) {
263
                $length = $testLength;
264
                return true;
265
            }
266
        }
267
268
        return false;
269
    }
270
271
    /**
272 22
     * Returns a string of the given length starting with the specified offset.
273
     *
274 22
     * @param int $length string length to be returned.
275 22
     * @param bool $caseSensitive if it's `false`, the string will be uppercased.
276
     * @param int|null $offset SQL code offset, defaults to current if `null` is passed.
277
     *
278 22
     * @return string result string, it may be empty if there's nothing to return.
279 22
     */
280
    protected function substring(int $length, bool $caseSensitive = true, ?int $offset = null): string
281
    {
282 22
        if ($offset === null) {
283
            $offset = $this->offset;
284 22
        }
285 22
286
        if ($offset + $length > $this->length) {
287 22
            return '';
288
        }
289
290
        $cacheKey = $offset . ',' . $length;
291 22
292
        if (!isset($this->substrings[$cacheKey . ',1'])) {
293
            $this->substrings[$cacheKey . ',1'] = mb_substr($this->sql, $offset, $length, 'UTF-8');
294
        }
295
296
        if (!$caseSensitive && !isset($this->substrings[$cacheKey . ',0'])) {
297
            $this->substrings[$cacheKey . ',0'] = mb_strtoupper($this->substrings[$cacheKey . ',1'], 'UTF-8');
298
        }
299
300
        return $this->substrings[$cacheKey . ',' . (int) $caseSensitive];
301
    }
302 22
303
    /**
304 22
     * Returns an index after the given string in the SQL code starting with the specified offset.
305
     *
306
     * @param string $string string to be found.
307
     * @param int|null $offset SQL code offset, defaults to current if `null` is passed.
308 22
     *
309
     * @return int index after the given string or end of string index.
310
     */
311
    protected function indexAfter(string $string, ?int $offset = null): int
312 22
    {
313
        if ($offset === null) {
314 22
            $offset = $this->offset;
315
        }
316
317 22
        if ($offset + mb_strlen($string, 'UTF-8') > $this->length) {
318
            return $this->length;
319
        }
320 22
321
        $afterIndexOf = mb_strpos($this->sql, $string, $offset, 'UTF-8');
322
323
        if ($afterIndexOf === false) {
324
            $afterIndexOf = $this->length;
325
        } else {
326
            $afterIndexOf += mb_strlen($string, 'UTF-8');
327
        }
328
329
        return $afterIndexOf;
330 22
    }
331
332 22
    /**
333 22
     * Determines whether there is a delimited string at the current offset and adds it to the token children.
334
     *
335 22
     * @param int $length
336 22
     *
337
     * @return bool
338
     */
339 22
    private function tokenizeDelimitedString(int &$length): bool
340
    {
341 22
        $isIdentifier = $this->isIdentifier($length, $content);
342 22
        $isStringLiteral = !$isIdentifier && $this->isStringLiteral($length, $content);
343 22
344 22
        if (!$isIdentifier && !$isStringLiteral) {
345 22
            return false;
346
        }
347 22
348
        $this->addTokenFromBuffer();
349 22
350
        $this->currentToken[] = (new SqlToken())
351
            ->type($isIdentifier ? SqlToken::TYPE_IDENTIFIER : SqlToken::TYPE_STRING_LITERAL)
352
            ->content(is_string($content) ? $content : $this->substring($length))
353
            ->startOffset($this->offset)
354
            ->endOffset($this->offset + $length);
355
356
        return true;
357
    }
358
359 22
    /**
360
     * Determines whether there is an operator at the current offset and adds it to the token children.
361 22
     *
362 22
     * @param int $length
363
     *
364
     * @return bool
365 22
     */
366
    private function tokenizeOperator(int &$length): bool
367 22
    {
368 22
        if (!$this->isOperator($length, $content)) {
369 22
            return false;
370 22
        }
371 22
372 22
        $this->addTokenFromBuffer();
373 22
374
        switch ($this->substring($length)) {
375 22
            case '(':
376
                $this->currentToken[] = (new SqlToken())
377 22
                    ->type(SqlToken::TYPE_OPERATOR)
378 22
                    ->content(is_string($content) ? $content : $this->substring($length))
379
                    ->startOffset($this->offset)
380 22
                    ->endOffset($this->offset + $length);
381
                $this->currentToken[] = (new SqlToken())->type(SqlToken::TYPE_PARENTHESIS);
382 22
383 22
                if ($this->currentToken[-1] !== null) {
384
                    $this->tokenStack->push($this->currentToken[-1]);
385 22
                }
386
387 22
                $this->currentToken = $this->tokenStack->top();
388 22
389 22
                break;
390
391 22
            case ')':
392 22
                $this->tokenStack->pop();
393 22
                $this->currentToken = $this->tokenStack->top();
394 22
                $this->currentToken[] = (new SqlToken())
395 22
                    ->type(SqlToken::TYPE_OPERATOR)
396
                    ->content(')')
397 22
                    ->startOffset($this->offset)
398
                    ->endOffset($this->offset + $length);
399 22
400 22
                break;
401 7
            case ';':
402 1
                if ($this->currentToken instanceof SqlToken && !$this->currentToken->getHasChildren()) {
403
                    break;
404
                }
405 7
406 7
                $this->currentToken[] = (new SqlToken())
407 7
                    ->type(SqlToken::TYPE_OPERATOR)
408 7
                    ->content(is_string($content) ? $content : $this->substring($length))
409 7
                    ->startOffset($this->offset)
410
                    ->endOffset($this->offset + $length);
411 7
                $this->tokenStack->pop();
412
                $this->currentToken = $this->tokenStack->top();
413 7
                $this->currentToken[] = (new SqlToken())->type(SqlToken::TYPE_STATEMENT);
414 7
415
                if ($this->currentToken[-1] instanceof SqlToken) {
416 7
                    $this->tokenStack->push($this->currentToken[-1]);
417 7
                }
418
419 7
                $this->currentToken = $this->tokenStack->top();
420 7
421 7
                break;
422
            default:
423 7
                $this->currentToken[] = (new SqlToken())
424
                    ->type(SqlToken::TYPE_OPERATOR)
425 22
                    ->content(is_string($content) ? $content : $this->substring($length))
426 22
                    ->startOffset($this->offset)
427 22
                    ->endOffset($this->offset + $length);
428 22
429 22
                break;
430
        }
431 22
432
        return true;
433 22
    }
434
435
    /**
436 22
     * Determines a type of text in the buffer, tokenizes it and adds it to the token children.
437
     */
438
    private function addTokenFromBuffer(): void
439
    {
440
        if ($this->buffer === '') {
441
            return;
442 22
        }
443
444 22
        $isKeyword = $this->isKeyword($this->buffer, $content);
445 22
446
        $this->currentToken[] = (new SqlToken())
447
            ->type($isKeyword ? SqlToken::TYPE_KEYWORD : SqlToken::TYPE_TOKEN)
448 22
            ->content(is_string($content) ? $content : $this->buffer)
449
            ->startOffset($this->offset - mb_strlen($this->buffer, 'UTF-8'))
450 22
            ->endOffset($this->offset);
451 22
452 22
        $this->buffer = '';
453 22
    }
454 22
455
    /**
456 22
     * Adds the specified length to the current offset.
457
     *
458 22
     * @param int $length
459 22
     *
460
     * @throws InvalidArgumentException
461
     */
462
    private function advance(int $length): void
463
    {
464
        if ($length <= 0) {
465
            throw new InvalidArgumentException('Length must be greater than 0.');
466
        }
467
468 22
        $this->offset += $length;
469
        $this->substrings = [];
470 22
    }
471
472
    /**
473
     * Returns whether the SQL code is completely traversed.
474 22
     *
475 22
     * @return bool
476 22
     */
477
    private function isEof(): bool
478
    {
479
        return $this->offset >= $this->length;
480
    }
481
}
482