Passed
Push — master ( 9dbdd9...d5a428 )
by Alexander
04:15
created

framework/db/SqlTokenizer.php (1 issue)

1
<?php
2
/**
3
 * @link http://www.yiiframework.com/
4
 * @copyright Copyright (c) 2008 Yii Software LLC
5
 * @license http://www.yiiframework.com/license/
6
 */
7
8
namespace yii\db;
9
10
use yii\base\Component;
11
use yii\base\InvalidArgumentException;
12
13
/**
14
 * SqlTokenizer splits an SQL query into individual SQL tokens.
15
 *
16
 * It can be used to obtain an addition information from an SQL code.
17
 *
18
 * Usage example:
19
 *
20
 * ```php
21
 * $tokenizer = new SqlTokenizer("SELECT * FROM user WHERE id = 1");
22
 * $root = $tokeinzer->tokenize();
23
 * $sqlTokens = $root->getChildren();
24
 * ```
25
 *
26
 * Tokens are instances of [[SqlToken]].
27
 *
28
 * @author Sergey Makinen <[email protected]>
29
 * @since 2.0.13
30
 */
31
abstract class SqlTokenizer extends Component
32
{
33
    /**
34
     * @var string SQL code.
35
     */
36
    public $sql;
37
38
    /**
39
     * @var int SQL code string length.
40
     */
41
    protected $length;
42
    /**
43
     * @var int SQL code string current offset.
44
     */
45
    protected $offset;
46
47
    /**
48
     * @var \SplStack stack of active tokens.
49
     */
50
    private $_tokenStack;
51
    /**
52
     * @var SqlToken active token. It's usually a top of the token stack.
53
     */
54
    private $_currentToken;
55
    /**
56
     * @var string[] cached substrings.
57
     */
58
    private $_substrings;
59
    /**
60
     * @var string current buffer value.
61
     */
62
    private $_buffer = '';
63
    /**
64
     * @var SqlToken resulting token of a last [[tokenize()]] call.
65
     */
66
    private $_token;
67
68
69
    /**
70
     * Constructor.
71
     * @param string $sql SQL code to be tokenized.
72
     * @param array $config name-value pairs that will be used to initialize the object properties
73
     */
74 25
    public function __construct($sql, $config = [])
75
    {
76 25
        $this->sql = $sql;
77 25
        parent::__construct($config);
78 25
    }
79
80
    /**
81
     * Tokenizes and returns a code type token.
82
     * @return SqlToken code type token.
83
     */
84 25
    public function tokenize()
85
    {
86 25
        $this->length = mb_strlen($this->sql, 'UTF-8');
87 25
        $this->offset = 0;
88 25
        $this->_substrings = [];
89 25
        $this->_buffer = '';
90 25
        $this->_token = new SqlToken([
91 25
            'type' => SqlToken::TYPE_CODE,
92 25
            'content' => $this->sql,
93
        ]);
94 25
        $this->_tokenStack = new \SplStack();
95 25
        $this->_tokenStack->push($this->_token);
96 25
        $this->_token[] = new SqlToken(['type' => SqlToken::TYPE_STATEMENT]);
97 25
        $this->_tokenStack->push($this->_token[0]);
98 25
        $this->_currentToken = $this->_tokenStack->top();
99 25
        while (!$this->isEof()) {
100 25
            if ($this->isWhitespace($length) || $this->isComment($length)) {
101 25
                $this->addTokenFromBuffer();
102 25
                $this->advance($length);
103 25
                continue;
104
            }
105
106 25
            if ($this->tokenizeOperator($length) || $this->tokenizeDelimitedString($length)) {
107 25
                $this->advance($length);
108 25
                continue;
109
            }
110
111 25
            $this->_buffer .= $this->substring(1);
112 25
            $this->advance(1);
113
        }
114 25
        $this->addTokenFromBuffer();
115 25
        if ($this->_token->getHasChildren() && !$this->_token[-1]->getHasChildren()) {
0 ignored issues
show
The method getHasChildren() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

115
        if ($this->_token->getHasChildren() && !$this->_token[-1]->/** @scrutinizer ignore-call */ getHasChildren()) {

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
116 13
            unset($this->_token[-1]);
117
        }
118
119 25
        return $this->_token;
120
    }
121
122
    /**
123
     * Returns whether there's a whitespace at the current offset.
124
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
125
     * @param int $length length of the matched string.
126
     * @return bool whether there's a whitespace at the current offset.
127
     */
128
    abstract protected function isWhitespace(&$length);
129
130
    /**
131
     * Returns whether there's a commentary at the current offset.
132
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
133
     * @param int $length length of the matched string.
134
     * @return bool whether there's a commentary at the current offset.
135
     */
136
    abstract protected function isComment(&$length);
137
138
    /**
139
     * Returns whether there's an operator at the current offset.
140
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
141
     * It may also set `$content` to a string that will be used as a token content.
142
     * @param int $length length of the matched string.
143
     * @param string $content optional content instead of the matched string.
144
     * @return bool whether there's an operator at the current offset.
145
     */
146
    abstract protected function isOperator(&$length, &$content);
147
148
    /**
149
     * Returns whether there's an identifier at the current offset.
150
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
151
     * It may also set `$content` to a string that will be used as a token content.
152
     * @param int $length length of the matched string.
153
     * @param string $content optional content instead of the matched string.
154
     * @return bool whether there's an identifier at the current offset.
155
     */
156
    abstract protected function isIdentifier(&$length, &$content);
157
158
    /**
159
     * Returns whether there's a string literal at the current offset.
160
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
161
     * It may also set `$content` to a string that will be used as a token content.
162
     * @param int $length length of the matched string.
163
     * @param string $content optional content instead of the matched string.
164
     * @return bool whether there's a string literal at the current offset.
165
     */
166
    abstract protected function isStringLiteral(&$length, &$content);
167
168
    /**
169
     * Returns whether the given string is a keyword.
170
     * The method may set `$content` to a string that will be used as a token content.
171
     * @param string $string string to be matched.
172
     * @param string $content optional content instead of the matched string.
173
     * @return bool whether the given string is a keyword.
174
     */
175
    abstract protected function isKeyword($string, &$content);
176
177
    /**
178
     * Returns whether the longest common prefix equals to the SQL code of the same length at the current offset.
179
     * @param string[] $with strings to be tested.
180
     * The method **will** modify this parameter to speed up lookups.
181
     * @param bool $caseSensitive whether to perform a case sensitive comparison.
182
     * @param int|null $length length of the matched string.
183
     * @param string|null $content matched string.
184
     * @return bool whether a match is found.
185
     */
186 25
    protected function startsWithAnyLongest(array &$with, $caseSensitive, &$length = null, &$content = null)
187
    {
188 25
        if (empty($with)) {
189
            return false;
190
        }
191
192 25
        if (!is_array(reset($with))) {
193 1
            usort($with, function ($string1, $string2) {
194 1
                return mb_strlen($string2, 'UTF-8') - mb_strlen($string1, 'UTF-8');
195 1
            });
196 1
            $map = [];
197 1
            foreach ($with as $string) {
198 1
                $map[mb_strlen($string, 'UTF-8')][$caseSensitive ? $string : mb_strtoupper($string, 'UTF-8')] = true;
199
            }
200 1
            $with = $map;
201
        }
202 25
        foreach ($with as $testLength => $testValues) {
203 25
            $content = $this->substring($testLength, $caseSensitive);
204 25
            if (isset($testValues[$content])) {
205 25
                $length = $testLength;
206 25
                return true;
207
            }
208
        }
209
210 25
        return false;
211
    }
212
213
    /**
214
     * Returns a string of the given length starting with the specified offset.
215
     * @param int $length string length to be returned.
216
     * @param bool $caseSensitive if it's `false`, the string will be uppercased.
217
     * @param int|null $offset SQL code offset, defaults to current if `null` is passed.
218
     * @return string result string, it may be empty if there's nothing to return.
219
     */
220 25
    protected function substring($length, $caseSensitive = true, $offset = null)
221
    {
222 25
        if ($offset === null) {
223 25
            $offset = $this->offset;
224
        }
225 25
        if ($offset + $length > $this->length) {
226 24
            return '';
227
        }
228
229 25
        $cacheKey = $offset . ',' . $length;
230 25
        if (!isset($this->_substrings[$cacheKey . ',1'])) {
231 25
            $this->_substrings[$cacheKey . ',1'] = mb_substr($this->sql, $offset, $length, 'UTF-8');
232
        }
233 25
        if (!$caseSensitive && !isset($this->_substrings[$cacheKey . ',0'])) {
234
            $this->_substrings[$cacheKey . ',0'] = mb_strtoupper($this->_substrings[$cacheKey . ',1'], 'UTF-8');
235
        }
236
237 25
        return $this->_substrings[$cacheKey . ',' . (int) $caseSensitive];
238
    }
239
240
    /**
241
     * Returns an index after the given string in the SQL code starting with the specified offset.
242
     * @param string $string string to be found.
243
     * @param int|null $offset SQL code offset, defaults to current if `null` is passed.
244
     * @return int index after the given string or end of string index.
245
     */
246 25
    protected function indexAfter($string, $offset = null)
247
    {
248 25
        if ($offset === null) {
249 1
            $offset = $this->offset;
250
        }
251 25
        if ($offset + mb_strlen($string, 'UTF-8') > $this->length) {
252
            return $this->length;
253
        }
254
255 25
        $afterIndexOf = mb_strpos($this->sql, $string, $offset, 'UTF-8');
256 25
        if ($afterIndexOf === false) {
257 1
            $afterIndexOf = $this->length;
258
        } else {
259 25
            $afterIndexOf += mb_strlen($string, 'UTF-8');
260
        }
261
262 25
        return $afterIndexOf;
263
    }
264
265
    /**
266
     * Determines whether there is a delimited string at the current offset and adds it to the token children.
267
     * @param int $length
268
     * @return bool
269
     */
270 25
    private function tokenizeDelimitedString(&$length)
271
    {
272 25
        $isIdentifier = $this->isIdentifier($length, $content);
273 25
        $isStringLiteral = !$isIdentifier && $this->isStringLiteral($length, $content);
274 25
        if (!$isIdentifier && !$isStringLiteral) {
275 25
            return false;
276
        }
277
278 25
        $this->addTokenFromBuffer();
279 25
        $this->_currentToken[] = new SqlToken([
280 25
            'type' => $isIdentifier ? SqlToken::TYPE_IDENTIFIER : SqlToken::TYPE_STRING_LITERAL,
281 25
            'content' => is_string($content) ? $content : $this->substring($length),
282 25
            'startOffset' => $this->offset,
283 25
            'endOffset' => $this->offset + $length,
284
        ]);
285 25
        return true;
286
    }
287
288
    /**
289
     * Determines whether there is an operator at the current offset and adds it to the token children.
290
     * @param int $length
291
     * @return bool
292
     */
293 25
    private function tokenizeOperator(&$length)
294
    {
295 25
        if (!$this->isOperator($length, $content)) {
296 25
            return false;
297
        }
298
299 25
        $this->addTokenFromBuffer();
300 25
        switch ($this->substring($length)) {
301 25
            case '(':
302 25
                $this->_currentToken[] = new SqlToken([
303 25
                    'type' => SqlToken::TYPE_OPERATOR,
304 25
                    'content' => is_string($content) ? $content : $this->substring($length),
305 25
                    'startOffset' => $this->offset,
306 25
                    'endOffset' => $this->offset + $length,
307
                ]);
308 25
                $this->_currentToken[] = new SqlToken(['type' => SqlToken::TYPE_PARENTHESIS]);
309 25
                $this->_tokenStack->push($this->_currentToken[-1]);
310 25
                $this->_currentToken = $this->_tokenStack->top();
311 25
                break;
312 25
            case ')':
313 25
                $this->_tokenStack->pop();
314 25
                $this->_currentToken = $this->_tokenStack->top();
315 25
                $this->_currentToken[] = new SqlToken([
316 25
                    'type' => SqlToken::TYPE_OPERATOR,
317 25
                    'content' => ')',
318 25
                    'startOffset' => $this->offset,
319 25
                    'endOffset' => $this->offset + $length,
320
                ]);
321 25
                break;
322 25
            case ';':
323 13
                if (!$this->_currentToken->getHasChildren()) {
324 1
                    break;
325
                }
326
327 13
                $this->_currentToken[] = new SqlToken([
328 13
                    'type' => SqlToken::TYPE_OPERATOR,
329 13
                    'content' => is_string($content) ? $content : $this->substring($length),
330 13
                    'startOffset' => $this->offset,
331 13
                    'endOffset' => $this->offset + $length,
332
                ]);
333 13
                $this->_tokenStack->pop();
334 13
                $this->_currentToken = $this->_tokenStack->top();
335 13
                $this->_currentToken[] = new SqlToken(['type' => SqlToken::TYPE_STATEMENT]);
336 13
                $this->_tokenStack->push($this->_currentToken[-1]);
337 13
                $this->_currentToken = $this->_tokenStack->top();
338 13
                break;
339
            default:
340 25
                $this->_currentToken[] = new SqlToken([
341 25
                    'type' => SqlToken::TYPE_OPERATOR,
342 25
                    'content' => is_string($content) ? $content : $this->substring($length),
343 25
                    'startOffset' => $this->offset,
344 25
                    'endOffset' => $this->offset + $length,
345
                ]);
346 25
                break;
347
        }
348
349 25
        return true;
350
    }
351
352
    /**
353
     * Determines a type of text in the buffer, tokenizes it and adds it to the token children.
354
     */
355 25
    private function addTokenFromBuffer()
356
    {
357 25
        if ($this->_buffer === '') {
358 25
            return;
359
        }
360
361 25
        $isKeyword = $this->isKeyword($this->_buffer, $content);
362 25
        $this->_currentToken[] = new SqlToken([
363 25
            'type' => $isKeyword ? SqlToken::TYPE_KEYWORD : SqlToken::TYPE_TOKEN,
364 25
            'content' => is_string($content) ? $content : $this->_buffer,
365 25
            'startOffset' => $this->offset - mb_strlen($this->_buffer, 'UTF-8'),
366 25
            'endOffset' => $this->offset,
367
        ]);
368 25
        $this->_buffer = '';
369 25
    }
370
371
    /**
372
     * Adds the specified length to the current offset.
373
     * @param int $length
374
     * @throws InvalidArgumentException
375
     */
376 25
    private function advance($length)
377
    {
378 25
        if ($length <= 0) {
379
            throw new InvalidArgumentException('Length must be greater than 0.');
380
        }
381
382 25
        $this->offset += $length;
383 25
        $this->_substrings = [];
384 25
    }
385
386
    /**
387
     * Returns whether the SQL code is completely traversed.
388
     * @return bool
389
     */
390 25
    private function isEof()
391
    {
392 25
        return $this->offset >= $this->length;
393
    }
394
}
395