Completed
Pull Request — master (#16756)
by Vladimir
13:05
created

SqlTokenizer::indexAfter()   A

Complexity

Conditions 4
Paths 6

Size

Total Lines 18

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 4.432

Importance

Changes 0
Metric Value
dl 0
loc 18
ccs 7
cts 10
cp 0.7
rs 9.6666
c 0
b 0
f 0
cc 4
nc 6
nop 2
crap 4.432
1
<?php
2
/**
3
 * @link http://www.yiiframework.com/
4
 * @copyright Copyright (c) 2008 Yii Software LLC
5
 * @license http://www.yiiframework.com/license/
6
 */
7
8
namespace yii\db;
9
10
use yii\base\Component;
11
use yii\base\InvalidArgumentException;
12
13
/**
14
 * SqlTokenizer splits an SQL query into individual SQL tokens.
15
 *
16
 * It can be used to obtain an addition information from an SQL code.
17
 *
18
 * Usage example:
19
 *
20
 * ```php
21
 * $tokenizer = new SqlTokenizer("SELECT * FROM user WHERE id = 1");
22
 * $root = $tokeinzer->tokenize();
23
 * $sqlTokens = $root->getChildren();
24
 * ```
25
 *
26
 * Tokens are instances of [[SqlToken]].
27
 *
28
 * @author Sergey Makinen <[email protected]>
29
 * @since 2.0.13
30
 */
31
abstract class SqlTokenizer extends Component
32
{
33
    /**
34
     * @var string SQL code.
35
     */
36
    public $sql;
37
38
    /**
39
     * @var int SQL code string length.
40
     */
41
    protected $length;
42
    /**
43
     * @var int SQL code string current offset.
44
     */
45
    protected $offset;
46
47
    /**
48
     * @var \SplStack stack of active tokens.
49
     */
50
    private $_tokenStack;
51
    /**
52
     * @var SqlToken active token. It's usually a top of the token stack.
53
     */
54
    private $_currentToken;
55
    /**
56
     * @var string[] cached substrings.
57
     */
58
    private $_substrings;
59
    /**
60
     * @var string current buffer value.
61
     */
62
    private $_buffer = '';
63
    /**
64
     * @var SqlToken resulting token of a last [[tokenize()]] call.
65
     */
66
    private $_token;
67
68
69
    /**
70
     * Constructor.
71
     * @param string $sql SQL code to be tokenized.
72
     * @param array $config name-value pairs that will be used to initialize the object properties
73
     */
74 22
    public function __construct($sql, $config = [])
75
    {
76 22
        $this->sql = $sql;
77 22
        parent::__construct($config);
78 22
    }
79
80
    /**
81
     * Tokenizes and returns a code type token.
82
     * @return SqlToken code type token.
83
     */
84 22
    public function tokenize()
85
    {
86 22
        $this->length = mb_strlen($this->sql, 'UTF-8');
87 22
        $this->offset = 0;
88 22
        $this->_substrings = [];
89 22
        $this->_buffer = '';
90 22
        $this->_token = new SqlToken([
91 22
            'type' => SqlToken::TYPE_CODE,
92 22
            'content' => $this->sql,
93
        ]);
94 22
        $this->_tokenStack = new \SplStack();
95 22
        $this->_tokenStack->push($this->_token);
96 22
        $this->_token[] = new SqlToken(['type' => SqlToken::TYPE_STATEMENT]);
97 22
        $this->_tokenStack->push($this->_token[0]);
98 22
        $this->_currentToken = $this->_tokenStack->top();
99 22
        while (!$this->isEof()) {
100 22
            if ($this->isWhitespace($length) || $this->isComment($length)) {
101 22
                $this->addTokenFromBuffer();
102 22
                $this->advance($length);
103 22
                continue;
104
            }
105
106 22
            if ($this->tokenizeOperator($length) || $this->tokenizeDelimitedString($length)) {
107 22
                $this->advance($length);
108 22
                continue;
109
            }
110
111 22
            $this->_buffer .= $this->substring(1);
112 22
            $this->advance(1);
113
        }
114 22
        $this->addTokenFromBuffer();
115 22
        if ($this->_token->getHasChildren() && !$this->_token[-1]->getHasChildren()) {
116 10
            unset($this->_token[-1]);
117
        }
118
119 22
        return $this->_token;
120
    }
121
122
    /**
123
     * Returns whether there's a whitespace at the current offset.
124
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
125
     * @param int $length length of the matched string.
126
     * @return bool whether there's a whitespace at the current offset.
127
     */
128
    abstract protected function isWhitespace(&$length);
129
130
    /**
131
     * Returns whether there's a commentary at the current offset.
132
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
133
     * @param int $length length of the matched string.
134
     * @return bool whether there's a commentary at the current offset.
135
     */
136
    abstract protected function isComment(&$length);
137
138
    /**
139
     * Returns whether there's an operator at the current offset.
140
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
141
     * It may also set `$content` to a string that will be used as a token content.
142
     * @param int $length length of the matched string.
143
     * @param string $content optional content instead of the matched string.
144
     * @return bool whether there's an operator at the current offset.
145
     */
146
    abstract protected function isOperator(&$length, &$content);
147
148
    /**
149
     * Returns whether there's an identifier at the current offset.
150
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
151
     * It may also set `$content` to a string that will be used as a token content.
152
     * @param int $length length of the matched string.
153
     * @param string $content optional content instead of the matched string.
154
     * @return bool whether there's an identifier at the current offset.
155
     */
156
    abstract protected function isIdentifier(&$length, &$content);
157
158
    /**
159
     * Returns whether there's a string literal at the current offset.
160
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
161
     * It may also set `$content` to a string that will be used as a token content.
162
     * @param int $length length of the matched string.
163
     * @param string $content optional content instead of the matched string.
164
     * @return bool whether there's a string literal at the current offset.
165
     */
166
    abstract protected function isStringLiteral(&$length, &$content);
167
168
    /**
169
     * Returns whether the given string is a keyword.
170
     * The method may set `$content` to a string that will be used as a token content.
171
     * @param string $string string to be matched.
172
     * @param string $content optional content instead of the matched string.
173
     * @return bool whether the given string is a keyword.
174
     */
175
    abstract protected function isKeyword($string, &$content);
176
177
    /**
178
     * Returns whether the longest common prefix equals to the SQL code of the same length at the current offset.
179
     * @param string[] $with strings to be tested.
180
     * The method **will** modify this parameter to speed up lookups.
181
     * @param bool $caseSensitive whether to perform a case sensitive comparison.
182
     * @param int|null $length length of the matched string.
183
     * @param string|null $content matched string.
184
     * @return bool whether a match is found.
185
     */
186 22
    protected function startsWithAnyLongest(array &$with, $caseSensitive, &$length = null, &$content = null)
187
    {
188 22
        if (empty($with)) {
189
            return false;
190
        }
191
192 22
        if (!is_array(reset($with))) {
193 1
            usort($with, function ($string1, $string2) {
194 1
                return mb_strlen($string2, 'UTF-8') - mb_strlen($string1, 'UTF-8');
195 1
            });
196 1
            $map = [];
197 1
            foreach ($with as $string) {
198 1
                $map[mb_strlen($string, 'UTF-8')][$caseSensitive ? $string : mb_strtoupper($string, 'UTF-8')] = true;
199
            }
200 1
            $with = $map;
201
        }
202 22
        foreach ($with as $testLength => $testValues) {
203 22
            $content = $this->substring($testLength, $caseSensitive);
204 22
            if (isset($testValues[$content])) {
205 22
                $length = $testLength;
206 22
                return true;
207
            }
208
        }
209
210 22
        return false;
211
    }
212
213
    /**
214
     * Returns a string of the given length starting with the specified offset.
215
     * @param int $length string length to be returned.
216
     * @param bool $caseSensitive if it's `false`, the string will be uppercased.
217
     * @param int|null $offset SQL code offset, defaults to current if `null` is passed.
218
     * @return string result string, it may be empty if there's nothing to return.
219
     */
220 22
    protected function substring($length, $caseSensitive = true, $offset = null)
221
    {
222 22
        if ($offset === null) {
223 22
            $offset = $this->offset;
224
        }
225 22
        if ($offset + $length > $this->length) {
226 22
            return '';
227
        }
228
229 22
        $cacheKey = $offset . ',' . $length;
230 22
        if (!isset($this->_substrings[$cacheKey . ',1'])) {
231 22
            $this->_substrings[$cacheKey . ',1'] = mb_substr($this->sql, $offset, $length, 'UTF-8');
232
        }
233 22
        if (!$caseSensitive && !isset($this->_substrings[$cacheKey . ',0'])) {
234
            $this->_substrings[$cacheKey . ',0'] = mb_strtoupper($this->_substrings[$cacheKey . ',1'], 'UTF-8');
235
        }
236
237 22
        return $this->_substrings[$cacheKey . ',' . (int) $caseSensitive];
238
    }
239
240
    /**
241
     * Returns an index after the given string in the SQL code starting with the specified offset.
242
     * @param string $string string to be found.
243
     * @param int|null $offset SQL code offset, defaults to current if `null` is passed.
244
     * @return int index after the given string or end of string index.
245
     */
246 22
    protected function indexAfter($string, $offset = null)
247
    {
248 22
        if ($offset === null) {
249
            $offset = $this->offset;
250
        }
251 22
        if ($offset + mb_strlen($string, 'UTF-8') > $this->length) {
252
            return $this->length;
253
        }
254
255 22
        $afterIndexOf = mb_strpos($this->sql, $string, $offset, 'UTF-8');
256 22
        if ($afterIndexOf === false) {
257
            $afterIndexOf = $this->length;
258
        } else {
259 22
            $afterIndexOf += mb_strlen($string, 'UTF-8');
260
        }
261
262 22
        return $afterIndexOf;
263
    }
264
265
    /**
266
     * Determines whether there is a delimited string at the current offset and adds it to the token children.
267
     * @param int $length
268
     * @return bool
269
     */
270 22
    private function tokenizeDelimitedString(&$length)
271
    {
272 22
        $isIdentifier = $this->isIdentifier($length, $content);
273 22
        $isStringLiteral = !$isIdentifier && $this->isStringLiteral($length, $content);
274 22
        if (!$isIdentifier && !$isStringLiteral) {
275 22
            return false;
276
        }
277
278 22
        $this->addTokenFromBuffer();
279 22
        $this->_currentToken[] = new SqlToken([
280 22
            'type' => $isIdentifier ? SqlToken::TYPE_IDENTIFIER : SqlToken::TYPE_STRING_LITERAL,
281 22
            'content' => is_string($content) ? $content : $this->substring($length),
282 22
            'startOffset' => $this->offset,
283 22
            'endOffset' => $this->offset + $length,
284
        ]);
285 22
        return true;
286
    }
287
288
    /**
289
     * Determines whether there is an operator at the current offset and adds it to the token children.
290
     * @param int $length
291
     * @return bool
292
     */
293 22
    private function tokenizeOperator(&$length)
294
    {
295 22
        if (!$this->isOperator($length, $content)) {
296 22
            return false;
297
        }
298
299 22
        $this->addTokenFromBuffer();
300 22
        switch ($this->substring($length)) {
301 22
            case '(':
302 22
                $this->_currentToken[] = new SqlToken([
303 22
                    'type' => SqlToken::TYPE_OPERATOR,
304 22
                    'content' => is_string($content) ? $content : $this->substring($length),
305 22
                    'startOffset' => $this->offset,
306 22
                    'endOffset' => $this->offset + $length,
307
                ]);
308 22
                $this->_currentToken[] = new SqlToken(['type' => SqlToken::TYPE_PARENTHESIS]);
309 22
                $this->_tokenStack->push($this->_currentToken[-1]);
310 22
                $this->_currentToken = $this->_tokenStack->top();
311 22
                break;
312 22
            case ')':
313 22
                $this->_tokenStack->pop();
314 22
                $this->_currentToken = $this->_tokenStack->top();
315 22
                $this->_currentToken[] = new SqlToken([
316 22
                    'type' => SqlToken::TYPE_OPERATOR,
317 22
                    'content' => ')',
318 22
                    'startOffset' => $this->offset,
319 22
                    'endOffset' => $this->offset + $length,
320
                ]);
321 22
                break;
322 22
            case ';':
323 10
                if (!$this->_currentToken->getHasChildren()) {
324
                    break;
325
                }
326
327 10
                $this->_currentToken[] = new SqlToken([
328 10
                    'type' => SqlToken::TYPE_OPERATOR,
329 10
                    'content' => is_string($content) ? $content : $this->substring($length),
330 10
                    'startOffset' => $this->offset,
331 10
                    'endOffset' => $this->offset + $length,
332
                ]);
333 10
                $this->_tokenStack->pop();
334 10
                $this->_currentToken = $this->_tokenStack->top();
335 10
                $this->_currentToken[] = new SqlToken(['type' => SqlToken::TYPE_STATEMENT]);
336 10
                $this->_tokenStack->push($this->_currentToken[-1]);
337 10
                $this->_currentToken = $this->_tokenStack->top();
338 10
                break;
339
            default:
340 22
                $this->_currentToken[] = new SqlToken([
341 22
                    'type' => SqlToken::TYPE_OPERATOR,
342 22
                    'content' => is_string($content) ? $content : $this->substring($length),
343 22
                    'startOffset' => $this->offset,
344 22
                    'endOffset' => $this->offset + $length,
345
                ]);
346 22
                break;
347
        }
348
349 22
        return true;
350
    }
351
352
    /**
353
     * Determines a type of text in the buffer, tokenizes it and adds it to the token children.
354
     */
355 22
    private function addTokenFromBuffer()
356
    {
357 22
        if ($this->_buffer === '') {
358 22
            return;
359
        }
360
361 22
        $isKeyword = $this->isKeyword($this->_buffer, $content);
362 22
        $this->_currentToken[] = new SqlToken([
363 22
            'type' => $isKeyword ? SqlToken::TYPE_KEYWORD : SqlToken::TYPE_TOKEN,
364 22
            'content' => is_string($content) ? $content : $this->_buffer,
365 22
            'startOffset' => $this->offset - mb_strlen($this->_buffer, 'UTF-8'),
366 22
            'endOffset' => $this->offset,
367
        ]);
368 22
        $this->_buffer = '';
369 22
    }
370
371
    /**
372
     * Adds the specified length to the current offset.
373
     * @param int $length
374
     * @throws InvalidArgumentException
375
     */
376 22
    private function advance($length)
377
    {
378 22
        if ($length <= 0) {
379
            throw new InvalidArgumentException('Length must be greater than 0.');
380
        }
381
382 22
        $this->offset += $length;
383 22
        $this->_substrings = [];
384 22
    }
385
386
    /**
387
     * Returns whether the SQL code is completely traversed.
388
     * @return bool
389
     */
390 22
    private function isEof()
391
    {
392 22
        return $this->offset >= $this->length;
393
    }
394
}
395