Passed
Push — master ( edc9c1...558a8f )
by Alexander
08:28
created

BaseTokenizer::substring()   A

Complexity

Conditions 6
Paths 10

Size

Total Lines 20
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 6.027

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 6
eloc 10
c 1
b 0
f 0
nc 10
nop 3
dl 0
loc 20
ccs 10
cts 11
cp 0.9091
crap 6.027
rs 9.2222
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Yiisoft\Db\Sqlite\Token;
6
7
/**
8
 * BaseTokenizer splits an SQL query into individual SQL tokens.
9
 *
10
 * It can be used to obtain an addition information from an SQL code.
11
 *
12
 * Usage example:
13
 *
14
 * ```php
15
 * $tokenizer = new SqlTokenizer("SELECT * FROM user WHERE id = 1");
16
 * $root = $tokeinzer->tokenize();
17
 * $sqlTokens = $root->getChildren();
18
 * ```
19
 *
20
 * Tokens are instances of {@see SqlToken}.
21
 */
22
abstract class BaseTokenizer
23
{
24
    /**
25
     * @var string SQL code.
26
     */
27
    private string $sql;
28
29
    /**
30
     * @var int SQL code string length.
31
     */
32
    protected int $length;
33
34
    /**
35
     * @var int SQL code string current offset.
36
     */
37
    protected int $offset;
38
39
    /**
40
     * @var \SplStack stack of active tokens.
41
     */
42
    private $tokenStack;
43
44
    /**
45
     * @var SqlToken|null active token. It's usually a top of the token stack.
46
     */
47
    private ?SqlToken $currentToken = null;
48
49
    /**
50
     * @var string[] cached substrings.
51
     */
52
    private array $substrings;
53
54
    /**
55
     * @var string string current buffer value.
56
     */
57
    private string $buffer = '';
58
59
    /**
60
     * @var SqlToken resulting token of a last {@see tokenize()} call.
61
     */
62
    private ?SqlToken $token = null;
63
64 19
    public function __construct(string $sql)
65
    {
66 19
        $this->sql = $sql;
67 19
    }
68
69
    /**
70
     * Tokenizes and returns a code type token.
71
     *
72
     * @return SqlToken code type token.
73
     */
74 19
    public function tokenize(): SqlToken
75
    {
76 19
        $this->length = \mb_strlen($this->sql, 'UTF-8');
77 19
        $this->offset = 0;
78 19
        $this->substrings = [];
79 19
        $this->buffer = '';
80
81 19
        $this->token = (new SqlToken())
82 19
            ->type(SqlToken::TYPE_CODE)
83 19
            ->content($this->sql);
84
85 19
        $this->tokenStack = new \SplStack();
86 19
        $this->tokenStack->push($this->token);
87
88 19
        $tk = (new SqlToken())
89 19
            ->type(SqlToken::TYPE_STATEMENT);
90
91 19
        $this->token[] = $tk;
92
93 19
        $this->tokenStack->push($this->token[0]);
94 19
        $this->currentToken = $this->tokenStack->top();
95
96 19
        while (!$this->isEof()) {
97 19
            if ($this->isWhitespace($length) || $this->isComment($length)) {
98 19
                $this->addTokenFromBuffer();
99 19
                $this->advance($length);
100
101 19
                continue;
102
            }
103
104 19
            if ($this->tokenizeOperator($length) || $this->tokenizeDelimitedString($length)) {
105 19
                $this->advance($length);
106
107 19
                continue;
108
            }
109
110 19
            $this->buffer .= $this->substring(1);
111 19
            $this->advance(1);
112
        }
113 19
        $this->addTokenFromBuffer();
114 19
        if ($this->token->getHasChildren() && !$this->token[-1]->getHasChildren()) {
0 ignored issues
show
Bug introduced by
The method getHasChildren() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

114
        if ($this->token->getHasChildren() && !$this->token[-1]->/** @scrutinizer ignore-call */ getHasChildren()) {

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
115 5
            unset($this->token[-1]);
116
        }
117
118 19
        return $this->token;
119
    }
120
121
    /**
122
     * Returns whether there's a whitespace at the current offset.
123
     *
124
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string.
125
     *
126
     * @param int|null $length length of the matched string.
127
     *
128
     * @return bool whether there's a whitespace at the current offset.
129
     */
130
    abstract protected function isWhitespace(?int &$length): bool;
131
132
    /**
133
     * Returns whether there's a commentary at the current offset.
134
     *
135
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string.
136
     *
137
     * @param int $length length of the matched string.
138
     *
139
     * @return bool whether there's a commentary at the current offset.
140
     */
141
    abstract protected function isComment(int &$length): bool;
142
143
    /**
144
     * Returns whether there's an operator at the current offset.
145
     *
146
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string. It may
147
     * also set `$content` to a string that will be used as a token content.
148
     *
149
     * @param int $length  length of the matched string.
150
     * @param string|null $content optional content instead of the matched string.
151
     *
152
     * @return bool whether there's an operator at the current offset.
153
     */
154
    abstract protected function isOperator(int &$length, ?string &$content): bool;
155
156
    /**
157
     * Returns whether there's an identifier at the current offset.
158
     *
159
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string. It may
160
     * also set `$content` to a string that will be used as a token content.
161
     *
162
     * @param int $length length of the matched string.
163
     * @param string|null $content optional content instead of the matched string.
164
     *
165
     * @return bool whether there's an identifier at the current offset.
166
     */
167
    abstract protected function isIdentifier(int &$length, ?string &$content): bool;
168
169
    /**
170
     * Returns whether there's a string literal at the current offset.
171
     *
172
     * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string. It may
173
     * also set `$content` to a string that will be used as a token content.
174
     *
175
     * @param int $length  length of the matched string.
176
     * @param string|null $content optional content instead of the matched string.
177
     *
178
     * @return bool whether there's a string literal at the current offset.
179
     */
180
    abstract protected function isStringLiteral(int &$length, ?string &$content): bool;
181
182
    /**
183
     * Returns whether the given string is a keyword.
184
     *
185
     * The method may set `$content` to a string that will be used as a token content.
186
     *
187
     * @param string $string  string to be matched.
188
     * @param string|null $content optional content instead of the matched string.
189
     *
190
     * @return bool whether the given string is a keyword.
191
     */
192
    abstract protected function isKeyword(string $string, ?string &$content): bool;
193
194
    /**
195
     * @param string $sql
196
     */
197
    public function setSql(string $sql): void
198
    {
199
        $this->sql = $sql;
200
    }
201
202
    /**
203
     * Returns whether the longest common prefix equals to the SQL code of the same length at the current offset.
204
     *
205
     * @param string[] $with strings to be tested. The method **will** modify this parameter to speed up lookups.
206
     * @param bool $caseSensitive whether to perform a case sensitive comparison.
207
     * @param int|null $length length of the matched string.
208
     * @param string|null $content matched string.
209
     *
210
     * @return bool whether a match is found.
211
     */
212 19
    protected function startsWithAnyLongest(
213
        array &$with,
214
        bool $caseSensitive,
215
        ?int &$length = null,
216
        ?string &$content = null
217
    ): bool {
218 19
        if (empty($with)) {
219
            return false;
220
        }
221
222 19
        if (!\is_array(\reset($with))) {
223 1
            \usort($with, function ($string1, $string2) {
224 1
                return \mb_strlen($string2, 'UTF-8') - \mb_strlen($string1, 'UTF-8');
225 1
            });
226
227 1
            $map = [];
228
229 1
            foreach ($with as $string) {
230 1
                $map[\mb_strlen($string, 'UTF-8')][$caseSensitive ? $string : \mb_strtoupper($string, 'UTF-8')] = true;
231
            }
232
233 1
            $with = $map;
234
        }
235 19
        foreach ($with as $testLength => $testValues) {
236 19
            $content = $this->substring($testLength, $caseSensitive);
237
238 19
            if (isset($testValues[$content])) {
239 19
                $length = $testLength;
240
241 19
                return true;
242
            }
243
        }
244
245 19
        return false;
246
    }
247
248
    /**
249
     * Returns a string of the given length starting with the specified offset.
250
     *
251
     * @param int $length string length to be returned.
252
     * @param bool $caseSensitive if it's `false`, the string will be uppercased.
253
     * @param int|null $offset SQL code offset, defaults to current if `null` is passed.
254
     *
255
     * @return string result string, it may be empty if there's nothing to return.
256
     */
257 19
    protected function substring(int $length, bool $caseSensitive = true, ?int $offset = null): string
258
    {
259 19
        if ($offset === null) {
260 19
            $offset = $this->offset;
261
        }
262
263 19
        if ($offset + $length > $this->length) {
264 19
            return '';
265
        }
266
267 19
        $cacheKey = $offset . ',' . $length;
268
269 19
        if (!isset($this->substrings[$cacheKey . ',1'])) {
270 19
            $this->substrings[$cacheKey . ',1'] = \mb_substr($this->sql, $offset, $length, 'UTF-8');
271
        }
272 19
        if (!$caseSensitive && !isset($this->substrings[$cacheKey . ',0'])) {
273
            $this->substrings[$cacheKey . ',0'] = \mb_strtoupper($this->substrings[$cacheKey . ',1'], 'UTF-8');
274
        }
275
276 19
        return $this->substrings[$cacheKey . ',' . (int) $caseSensitive];
277
    }
278
279
    /**
280
     * Returns an index after the given string in the SQL code starting with the specified offset.
281
     *
282
     * @param string $string string to be found.
283
     * @param int|null $offset SQL code offset, defaults to current if `null` is passed.
284
     *
285
     * @return int index after the given string or end of string index.
286
     */
287 19
    protected function indexAfter(string $string, ?int $offset = null): int
288
    {
289 19
        if ($offset === null) {
290
            $offset = $this->offset;
291
        }
292
293 19
        if ($offset + \mb_strlen($string, 'UTF-8') > $this->length) {
294
            return $this->length;
295
        }
296
297 19
        $afterIndexOf = \mb_strpos($this->sql, $string, $offset, 'UTF-8');
298
299 19
        if ($afterIndexOf === false) {
300
            $afterIndexOf = $this->length;
301
        } else {
302 19
            $afterIndexOf += \mb_strlen($string, 'UTF-8');
303
        }
304
305 19
        return $afterIndexOf;
306
    }
307
308
    /**
309
     * Determines whether there is a delimited string at the current offset and adds it to the token children.
310
     *
311
     * @param int $length
312
     *
313
     * @return bool
314
     */
315 19
    private function tokenizeDelimitedString(int &$length): bool
316
    {
317 19
        $isIdentifier = $this->isIdentifier($length, $content);
318 19
        $isStringLiteral = !$isIdentifier && $this->isStringLiteral($length, $content);
319
320 19
        if (!$isIdentifier && !$isStringLiteral) {
321 19
            return false;
322
        }
323
324 19
        $this->addTokenFromBuffer();
325
326 19
        $tk = (new SqlToken())
327 19
            ->type($isIdentifier ? SqlToken::TYPE_IDENTIFIER : SqlToken::TYPE_STRING_LITERAL)
328 19
            ->content(\is_string($content) ? $content : $this->substring($length))
329 19
            ->startOffset($this->offset)
330 19
            ->endOffset($this->offset + $length);
331
332 19
        $this->currentToken[] = $tk;
333
334 19
        return true;
335
    }
336
337
    /**
338
     * Determines whether there is an operator at the current offset and adds it to the token children.
339
     *
340
     * @param int $length
341
     *
342
     * @return bool
343
     */
344 19
    private function tokenizeOperator(int &$length): bool
345
    {
346 19
        if (!$this->isOperator($length, $content)) {
347 19
            return false;
348
        }
349
350 19
        $this->addTokenFromBuffer();
351
352 19
        switch ($this->substring($length)) {
353 19
            case '(':
354 19
                $tk = (new SqlToken())
355 19
                    ->type(SqlToken::TYPE_OPERATOR)
356 19
                    ->content(\is_string($content) ? $content : $this->substring($length))
357 19
                    ->startOffset($this->offset)
358 19
                    ->endOffset($this->offset + $length);
359
360 19
                $this->currentToken[] = $tk;
361
362 19
                $tk1 = (new SqlToken())
363 19
                    ->type(SqlToken::TYPE_PARENTHESIS);
364
365 19
                $this->currentToken[] = $tk1;
366
367 19
                $this->tokenStack->push($this->currentToken[-1]);
368 19
                $this->currentToken = $this->tokenStack->top();
369
370 19
                break;
371
372 19
            case ')':
373 19
                $this->tokenStack->pop();
374 19
                $this->currentToken = $this->tokenStack->top();
375
376 19
                $tk = (new SqlToken())
377 19
                    ->type(SqlToken::TYPE_OPERATOR)
378 19
                    ->content(')')
379 19
                    ->startOffset($this->offset)
380 19
                    ->endOffset($this->offset + $length);
381
382 19
                $this->currentToken[] = $tk;
383
384 19
                break;
385 19
            case ';':
386 7
                if (!$this->currentToken->getHasChildren()) {
0 ignored issues
show
Bug introduced by
The method getHasChildren() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

386
                if (!$this->currentToken->/** @scrutinizer ignore-call */ getHasChildren()) {

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
387 1
                    break;
388
                }
389
390 7
                $tk = (new SqlToken())
391 7
                    ->type(SqlToken::TYPE_OPERATOR)
392 7
                    ->content(\is_string($content) ? $content : $this->substring($length))
393 7
                    ->startOffset($this->offset)
394 7
                    ->endOffset($this->offset + $length);
395
396 7
                $this->currentToken[] = $tk;
397
398 7
                $this->tokenStack->pop();
399 7
                $this->currentToken = $this->tokenStack->top();
400
401 7
                $tk1 = (new SqlToken())
402 7
                    ->type(SqlToken::TYPE_STATEMENT);
403
404 7
                $this->currentToken[] = $tk1;
405 7
                $this->tokenStack->push($this->currentToken[-1]);
406 7
                $this->currentToken = $this->tokenStack->top();
407
408 7
                break;
409
            default:
410 19
                $tk = (new SqlToken())
411 19
                    ->type(SqlToken::TYPE_OPERATOR)
412 19
                    ->content(\is_string($content) ? $content : $this->substring($length))
413 19
                    ->startOffset($this->offset)
414 19
                    ->endOffset($this->offset + $length);
415
416 19
                $this->currentToken[] = $tk;
417
418 19
                break;
419
        }
420
421 19
        return true;
422
    }
423
424
    /**
425
     * Determines a type of text in the buffer, tokenizes it and adds it to the token children.
426
     */
427 19
    private function addTokenFromBuffer(): void
428
    {
429 19
        if ($this->buffer === '') {
430 19
            return;
431
        }
432
433 19
        $isKeyword = $this->isKeyword($this->buffer, $content);
434
435 19
        $tk = (new SqlToken())
436 19
            ->type($isKeyword ? SqlToken::TYPE_KEYWORD : SqlToken::TYPE_TOKEN)
437 19
            ->content(\is_string($content) ? $content : $this->buffer)
438 19
            ->startOffset($this->offset - \mb_strlen($this->buffer, 'UTF-8'))
439 19
            ->endOffset($this->offset);
440
441 19
        $this->currentToken[] = $tk;
442
443 19
        $this->buffer = '';
444 19
    }
445
446
    /**
447
     * Adds the specified length to the current offset.
448
     *
449
     * @param int $length
450
     *
451
     * @throws \InvalidArgumentException
452
     */
453 19
    private function advance(int $length): void
454
    {
455 19
        if ($length <= 0) {
456
            throw new \InvalidArgumentException('Length must be greater than 0.');
457
        }
458
459 19
        $this->offset += $length;
460 19
        $this->substrings = [];
461 19
    }
462
463
    /**
464
     * Returns whether the SQL code is completely traversed.
465
     *
466
     * @return bool
467
     */
468 19
    private function isEof(): bool
469
    {
470 19
        return $this->offset >= $this->length;
471
    }
472
}
473