Completed
Push — master ( e0dde8...a7d2aa )
by Carsten
09:47
created

SqlTokenizer   B

Complexity

Total Complexity 48

Size/Duplication

Total Lines 359
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 3

Test Coverage

Coverage 97.24%

Importance

Changes 0
Metric Value
wmc 48
lcom 1
cbo 3
dl 0
loc 359
ccs 141
cts 145
cp 0.9724
rs 8.4864
c 0
b 0
f 0

16 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 5 1
C tokenize() 0 36 8
isWhitespace() 0 1 ?
isComment() 0 1 ?
isOperator() 0 1 ?
isIdentifier() 0 1 ?
isStringLiteral() 0 1 ?
isKeyword() 0 1 ?
C startsWithAnyLongest() 0 25 7
B substring() 0 18 6
A indexAfter() 0 17 4
B tokenizeDelimitedString() 0 17 6
B tokenizeOperator() 0 57 9
A addTokenFromBuffer() 0 15 4
A advance() 0 9 2
A isEof() 0 4 1

How to fix   Complexity   

Complex Class

Complex classes like SqlTokenizer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use SqlTokenizer, and based on these observations, apply Extract Interface, too.

1
<?php
2
/**
3
 * @link http://www.yiiframework.com/
4
 * @copyright Copyright (c) 2008 Yii Software LLC
5
 * @license http://www.yiiframework.com/license/
6
 */
7
8
namespace yii\db;
9
10
use yii\base\Component;
11
use yii\base\InvalidParamException;
12
13
/**
14
 * SqlTokenizer splits an SQL query into individual SQL tokens.
15
 *
16
 * It can be used to obtain an addition information from an SQL code.
17
 *
18
 * Usage example:
19
 *
20
 * ```php
21
 * $tokenizer = new SqlTokenizer("SELECT * FROM user WHERE id = 1");
22
 * $root = $tokeinzer->tokenize();
23
 * $sqlTokens = $root->getChildren();
24
 * ```
25
 *
26
 * Tokens are instances of [[SqlToken]].
27
 *
28
 * @author Sergey Makinen <[email protected]>
29
 * @since 2.0.13
30
 */
31
abstract class SqlTokenizer extends Component
32
{
33
    /**
34
     * @var string SQL code.
35
     */
36
    public $sql;
37
38
    /**
39
     * @var int SQL code string length.
40
     */
41
    protected $length;
42
    /**
43
     * @var int SQL code string current offset.
44
     */
45
    protected $offset;
46
47
    /**
48
     * @var \SplStack stack of active tokens.
49
     */
50
    private $_tokenStack;
51
    /**
52
     * @var SqlToken active token. It's usually a top of the token stack.
53
     */
54
    private $_currentToken;
55
    /**
56
     * @var string[] cached substrings.
57
     */
58
    private $_substrings;
59
    /**
60
     * @var string current buffer value.
61
     */
62
    private $_buffer = '';
63
    /**
64
     * @var SqlToken resulting token of a last [[tokenize()]] call.
65
     */
66
    private $_token;
67
68
69
    /**
70
     * Constructor.
71
     * @param string $sql SQL code to be tokenized.
72
     * @param array $config name-value pairs that will be used to initialize the object properties
73
     */
74 13
    public function __construct($sql, $config = [])
75
    {
76 13
        $this->sql = $sql;
77 13
        parent::__construct($config);
78 13
    }
79
80
    /**
81
     * Tokenizes and returns a code type token.
82
     * @return SqlToken code type token.
83
     */
84 13
    public function tokenize()
85
    {
86 13
        $this->length = mb_strlen($this->sql, 'UTF-8');
87 13
        $this->offset = 0;
88 13
        $this->_substrings = [];
89 13
        $this->_buffer = '';
90 13
        $this->_token = new SqlToken([
91 13
            'type' => SqlToken::TYPE_CODE,
92 13
            'content' => $this->sql,
93
        ]);
94 13
        $this->_tokenStack = new \SplStack();
95 13
        $this->_tokenStack->push($this->_token);
96 13
        $this->_token[] = new SqlToken(['type' => SqlToken::TYPE_STATEMENT]);
97 13
        $this->_tokenStack->push($this->_token[0]);
98 13
        $this->_currentToken = $this->_tokenStack->top();
99 13
        while (!$this->isEof()) {
100 13
            if ($this->isWhitespace($length) || $this->isComment($length)) {
101 13
                $this->addTokenFromBuffer();
102 13
                $this->advance($length);
103 13
                continue;
104
            }
105
106 13
            if ($this->tokenizeOperator($length) || $this->tokenizeDelimitedString($length)) {
107 13
                $this->advance($length);
108 13
                continue;
109
            }
110
111 13
            $this->_buffer .= $this->substring(1);
112 13
            $this->advance(1);
113
        }
114 13
        $this->addTokenFromBuffer();
115 13
        if ($this->_token->getHasChildren() && !$this->_token[-1]->getHasChildren()) {
116 1
            unset($this->_token[-1]);
117
        }
118 13
        return $this->_token;
119
    }
120
121
    /**
122
     * Returns whether there's a whitespace at the current offset.
123
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
124
     * @param int $length length of the matched string.
125
     * @return bool whether there's a whitespace at the current offset.
126
     */
127
    abstract protected function isWhitespace(&$length);
128
129
    /**
130
     * Returns whether there's a commentary at the current offset.
131
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
132
     * @param int $length length of the matched string.
133
     * @return bool whether there's a commentary at the current offset.
134
     */
135
    abstract protected function isComment(&$length);
136
137
    /**
138
     * Returns whether there's an operator at the current offset.
139
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
140
     * It may also set `$content` to a string that will be used as a token content.
141
     * @param int $length length of the matched string.
142
     * @param string $content optional content instead of the matched string.
143
     * @return bool whether there's an operator at the current offset.
144
     */
145
    abstract protected function isOperator(&$length, &$content);
146
147
    /**
148
     * Returns whether there's an identifier at the current offset.
149
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
150
     * It may also set `$content` to a string that will be used as a token content.
151
     * @param int $length length of the matched string.
152
     * @param string $content optional content instead of the matched string.
153
     * @return bool whether there's an identifier at the current offset.
154
     */
155
    abstract protected function isIdentifier(&$length, &$content);
156
157
    /**
158
     * Returns whether there's a string literal at the current offset.
159
     * If this methos returns `true`, it has to set the `$length` parameter to the length of the matched string.
160
     * It may also set `$content` to a string that will be used as a token content.
161
     * @param int $length length of the matched string.
162
     * @param string $content optional content instead of the matched string.
163
     * @return bool whether there's a string literal at the current offset.
164
     */
165
    abstract protected function isStringLiteral(&$length, &$content);
166
167
    /**
168
     * Returns whether the given string is a keyword.
169
     * The method may set `$content` to a string that will be used as a token content.
170
     * @param string $string string to be matched.
171
     * @param string $content optional content instead of the matched string.
172
     * @return bool whether the given string is a keyword.
173
     */
174
    abstract protected function isKeyword($string, &$content);
175
176
    /**
177
     * Returns whether the longest common prefix equals to the SQL code of the same length at the current offset.
178
     * @param string[] $with strings to be tested.
179
     * The method **will** modify this parameter to speed up lookups.
180
     * @param bool $caseSensitive whether to perform a case sensitive comparison.
181
     * @param int|null $length length of the matched string.
182
     * @param string|null $content matched string.
183
     * @return bool whether a match is found.
184
     */
185 13
    protected function startsWithAnyLongest(array &$with, $caseSensitive, &$length = null, &$content = null)
186
    {
187 13
        if (empty($with)) {
188
            return false;
189
        }
190
191 13
        if (!is_array(reset($with))) {
192 1
            usort($with, function ($string1, $string2) {
193 1
                return mb_strlen($string2, 'UTF-8') - mb_strlen($string1, 'UTF-8');
194 1
            });
195 1
            $map = [];
196 1
            foreach ($with as $string) {
197 1
                $map[mb_strlen($string, 'UTF-8')][$caseSensitive ? $string : mb_strtoupper($string, 'UTF-8')] = true;
198
            }
199 1
            $with = $map;
200
        }
201 13
        foreach ($with as $testLength => $testValues) {
202 13
            $content = $this->substring($testLength, $caseSensitive);
203 13
            if (isset($testValues[$content])) {
204 13
                $length = $testLength;
205 13
                return true;
206
            }
207
        }
208 13
        return false;
209
    }
210
211
    /**
212
     * Returns a string of the given length starting with the specified offset.
213
     * @param int $length string length to be returned.
214
     * @param bool $caseSensitive if it's `false`, the string will be uppercased.
215
     * @param int|null $offset SQL code offset, defaults to current if `null` is passed.
216
     * @return string result string, it may be empty if there's nothing to return.
217
     */
218 13
    protected function substring($length, $caseSensitive = true, $offset = null)
219
    {
220 13
        if ($offset === null) {
221 13
            $offset = $this->offset;
222
        }
223 13
        if ($offset + $length > $this->length) {
224 12
            return '';
225
        }
226
227 13
        $cacheKey = $offset . ',' . $length;
228 13
        if (!isset($this->_substrings[$cacheKey . ',1'])) {
229 13
            $this->_substrings[$cacheKey . ',1'] = mb_substr($this->sql, $offset, $length, 'UTF-8');
230
        }
231 13
        if (!$caseSensitive && !isset($this->_substrings[$cacheKey . ',0'])) {
232
            $this->_substrings[$cacheKey . ',0'] = mb_strtoupper($this->_substrings[$cacheKey . ',1'], 'UTF-8');
233
        }
234 13
        return $this->_substrings[$cacheKey . ',' . (int) $caseSensitive];
235
    }
236
237
    /**
238
     * Returns an index after the given string in the SQL code starting with the specified offset.
239
     * @param string $string string to be found.
240
     * @param int|null $offset SQL code offset, defaults to current if `null` is passed.
241
     * @return int index after the given string or end of string index.
242
     */
243 13
    protected function indexAfter($string, $offset = null)
244
    {
245 13
        if ($offset === null) {
246 1
            $offset = $this->offset;
247
        }
248 13
        if ($offset + mb_strlen($string, 'UTF-8') > $this->length) {
249
            return $this->length;
250
        }
251
252 13
        $afterIndexOf = mb_strpos($this->sql, $string, $offset, 'UTF-8');
253 13
        if ($afterIndexOf === false) {
254 1
            $afterIndexOf = $this->length;
255
        } else {
256 13
            $afterIndexOf += mb_strlen($string, 'UTF-8');
257
        }
258 13
        return $afterIndexOf;
259
    }
260
261
    /**
262
     * Determines whether there is a delimited string at the current offset and adds it to the token children.
263
     * @param int $length
264
     * @return bool
265
     */
266 13
    private function tokenizeDelimitedString(&$length)
267
    {
268 13
        $isIdentifier = $this->isIdentifier($length, $content);
269 13
        $isStringLiteral = !$isIdentifier && $this->isStringLiteral($length, $content);
270 13
        if (!$isIdentifier && !$isStringLiteral) {
271 13
            return false;
272
        }
273
274 13
        $this->addTokenFromBuffer();
275 13
        $this->_currentToken[] = new SqlToken([
276 13
            'type' => $isIdentifier ? SqlToken::TYPE_IDENTIFIER : SqlToken::TYPE_STRING_LITERAL,
277 13
            'content' => is_string($content) ? $content : $this->substring($length),
278 13
            'startOffset' => $this->offset,
279 13
            'endOffset' => $this->offset + $length,
280
        ]);
281 13
        return true;
282
    }
283
284
    /**
285
     * Determines whether there is an operator at the current offset and adds it to the token children.
286
     * @param int $length
287
     * @return bool
288
     */
289 13
    private function tokenizeOperator(&$length)
290
    {
291 13
        if (!$this->isOperator($length, $content)) {
292 13
            return false;
293
        }
294
295 13
        $this->addTokenFromBuffer();
296 13
        switch ($this->substring($length)) {
297 13
            case '(':
298 13
                $this->_currentToken[] = new SqlToken([
299 13
                    'type' => SqlToken::TYPE_OPERATOR,
300 13
                    'content' => is_string($content) ? $content : $this->substring($length),
301 13
                    'startOffset' => $this->offset,
302 13
                    'endOffset' => $this->offset + $length,
303
                ]);
304 13
                $this->_currentToken[] = new SqlToken(['type' => SqlToken::TYPE_PARENTHESIS]);
305 13
                $this->_tokenStack->push($this->_currentToken[-1]);
306 13
                $this->_currentToken = $this->_tokenStack->top();
307 13
                break;
308 13
            case ')':
309 13
                $this->_tokenStack->pop();
310 13
                $this->_currentToken = $this->_tokenStack->top();
311 13
                $this->_currentToken[] = new SqlToken([
312 13
                    'type' => SqlToken::TYPE_OPERATOR,
313 13
                    'content' => ')',
314 13
                    'startOffset' => $this->offset,
315 13
                    'endOffset' => $this->offset + $length,
316
                ]);
317 13
                break;
318 13
            case ';':
319 1
                if (!$this->_currentToken->getHasChildren()) {
320 1
                    break;
321
                }
322
323 1
                $this->_currentToken[] = new SqlToken([
324 1
                    'type' => SqlToken::TYPE_OPERATOR,
325 1
                    'content' => is_string($content) ? $content : $this->substring($length),
326 1
                    'startOffset' => $this->offset,
327 1
                    'endOffset' => $this->offset + $length,
328
                ]);
329 1
                $this->_tokenStack->pop();
330 1
                $this->_currentToken = $this->_tokenStack->top();
331 1
                $this->_currentToken[] = new SqlToken(['type' => SqlToken::TYPE_STATEMENT]);
332 1
                $this->_tokenStack->push($this->_currentToken[-1]);
333 1
                $this->_currentToken = $this->_tokenStack->top();
334 1
                break;
335
            default:
336 13
                $this->_currentToken[] = new SqlToken([
337 13
                    'type' => SqlToken::TYPE_OPERATOR,
338 13
                    'content' => is_string($content) ? $content : $this->substring($length),
339 13
                    'startOffset' => $this->offset,
340 13
                    'endOffset' => $this->offset + $length,
341
                ]);
342 13
                break;
343
        }
344 13
        return true;
345
    }
346
347
    /**
348
     * Determines a type of text in the buffer, tokenizes it and adds it to the token children.
349
     */
350 13
    private function addTokenFromBuffer()
351
    {
352 13
        if ($this->_buffer === '') {
353 13
            return;
354
        }
355
356 13
        $isKeyword = $this->isKeyword($this->_buffer, $content);
357 13
        $this->_currentToken[] = new SqlToken([
358 13
            'type' => $isKeyword ? SqlToken::TYPE_KEYWORD : SqlToken::TYPE_TOKEN,
359 13
            'content' => is_string($content) ? $content : $this->_buffer,
360 13
            'startOffset' => $this->offset - mb_strlen($this->_buffer, 'UTF-8'),
361 13
            'endOffset' => $this->offset,
362
        ]);
363 13
        $this->_buffer = '';
364 13
    }
365
366
    /**
367
     * Adds the specified length to the current offset.
368
     * @param int $length
369
     * @throws InvalidParamException
370
     */
371 13
    private function advance($length)
372
    {
373 13
        if ($length <= 0) {
374
            throw new InvalidParamException('Length must be greater than 0.');
375
        }
376
377 13
        $this->offset += $length;
378 13
        $this->_substrings = [];
379 13
    }
380
381
    /**
382
     * Returns whether the SQL code is completely traversed.
383
     * @return bool
384
     */
385 13
    private function isEof()
386
    {
387 13
        return $this->offset >= $this->length;
388
    }
389
}
390