Passed
Pull Request — master (#363)
by
unknown
15:49 queued 05:45
created

WithStatement::getSubTokenList()   B

Complexity

Conditions 7
Paths 16

Size

Total Lines 37
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 56

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 7
eloc 20
nc 16
nop 1
dl 0
loc 37
ccs 0
cts 0
cp 0
crap 56
rs 8.6666
c 1
b 0
f 0
1
<?php
2
/**
3
 * `WITH` statement.
4
 */
5
6
declare(strict_types=1);
7
8
namespace PhpMyAdmin\SqlParser\Statements;
9
10
use PhpMyAdmin\SqlParser\Components\Array2d;
11
use PhpMyAdmin\SqlParser\Components\OptionsArray;
12
use PhpMyAdmin\SqlParser\Components\WithKeyword;
13
use PhpMyAdmin\SqlParser\Exceptions\ParserException;
14
use PhpMyAdmin\SqlParser\Parser;
15
use PhpMyAdmin\SqlParser\Statement;
16
use PhpMyAdmin\SqlParser\Token;
17
use PhpMyAdmin\SqlParser\TokensList;
18
use PhpMyAdmin\SqlParser\Translator;
19
20
use function array_slice;
21
use function count;
22
23
/**
24
 * `WITH` statement.
25
26
 *  WITH [RECURSIVE] query_name [ (column_name [,...]) ] AS (SELECT ...) [, ...]
27
 */
28
final class WithStatement extends Statement
29
{
30
    /**
31
     * Options for `WITH` statements and their slot ID.
32
     *
33
     * @var mixed[]
34
     */
35
    public static $OPTIONS = ['RECURSIVE' => 1];
36
37
    /**
38
     * The clauses of this statement, in order.
39
     *
40
     * @see Statement::$CLAUSES
41
     *
42
     * @var mixed[]
43
     */
44
    public static $CLAUSES = [
45
        'WITH' => [
46
            'WITH',
47
            2,
48
        ],
49
        // Used for options.
50
        '_OPTIONS' => [
51
            '_OPTIONS',
52
            1,
53
        ],
54
        'AS' => [
55
            'AS',
56
            2,
57
        ],
58
    ];
59
60
    /** @var WithKeyword[] */
61
    public $withers = [];
62
63
    /**
64
     * @param Parser     $parser the instance that requests parsing
65
     * @param TokensList $list   the list of tokens to be parsed
66
     */
67 16
    public function parse(Parser $parser, TokensList $list)
68
    {
69 16
        /**
70
         * The state of the parser.
71
         *
72 16
         * Below are the states of the parser.
73 16
         *
74
         *      0 ---------------- [ name ] -----------------> 1
75
         *
76
         *      1 ------------------ [ ( ] ------------------> 2
77
         *
78
         *      2 ------------------ [ AS ] -----------------> 3
79
         *
80
         *      3 ------------------ [ ( ] ------------------> 4
81
         *
82
         *      4 ------------------ [ , ] ------------------> 1
83
         *
84
         *      4 ----- [ SELECT/UPDATE/DELETE/INSERT ] -----> 5
85
         *
86 16
         * @var int
87 16
         */
88
        $state = 0;
89 16
        $wither = null;
90
91
        ++$list->idx; // Skipping `WITH`.
92
93
        // parse any options if provided
94
        $this->options = OptionsArray::parse($parser, $list, static::$OPTIONS);
95 16
        ++$list->idx;
96
97
        for (; $list->idx < $list->count; ++$list->idx) {
98 16
            /**
99 12
             * Token parsed at this moment.
100
             *
101
             * @var Token
102 16
             */
103 16
            $token = $list->tokens[$list->idx];
104 16
105 16
            // Skipping whitespaces and comments.
106 16
            if ($token->type === Token::TYPE_WHITESPACE || $token->type === Token::TYPE_COMMENT) {
107
                continue;
108
            }
109 16
110 16
            if ($state === 0) {
111 12
                if ($token->type !== Token::TYPE_NONE) {
112 12
                    $parser->error('The name of the CTE was expected.', $token);
113
                    break;
114
                }
115 16
116 16
                $wither = $token->value;
117 16
                $this->withers[$wither] = new WithKeyword($wither);
118 16
                $state = 1;
119
            } elseif ($state === 1) {
120 16
                if ($token->type === Token::TYPE_OPERATOR && $token->value === '(') {
121 16
                    $this->withers[$wither]->columns = Array2d::parse($parser, $list);
122 16
                    $state = 2;
123 16
                } elseif ($token->type === Token::TYPE_KEYWORD && $token->keyword === 'AS') {
124 16
                    $state = 3;
125 4
                } else {
126 4
                    $parser->error('Unexpected token.', $token);
127
                    break;
128
                }
129 12
            } elseif ($state === 2) {
130
                if (! ($token->type === Token::TYPE_KEYWORD && $token->keyword === 'AS')) {
131 12
                    $parser->error('AS keyword was expected.', $token);
132 4
                    break;
133 4
                }
134
135
                $state = 3;
136
            } elseif ($state === 3) {
137 12
                $idxBeforeGetNext = $list->idx;
138 12
139
                // We want to get the next non-comment and non-space token after $token
140
                // therefore, the first getNext call will start with the current $idx which's $token,
141
                // will return it and increase $idx by 1, which's not guaranteed to be non-comment
142 12
                // and non-space, that's why we're calling getNext again.
143 4
                $list->getNext();
144 4
                $nextKeyword = $list->getNext();
145 4
146
                if (! ($token->value === '(' && ($nextKeyword && $nextKeyword->value === 'SELECT'))) {
147
                    $parser->error('Subquery of the CTE was expected.', $token);
148
                    $list->idx = $idxBeforeGetNext;
149 12
                    break;
150
                }
151
152
                // Restore the index
153 16
                $list->idx = $idxBeforeGetNext;
154 16
155
                ++$list->idx;
156
                $subList = $this->getSubTokenList($list);
157
                if ($subList instanceof ParserException) {
158
                    $parser->errors[] = $subList;
159 8
                    break;
160
                }
161 8
162
                $subParser = new Parser($subList);
163 8
164 8
                if (count($subParser->errors)) {
165 8
                    foreach ($subParser->errors as $error) {
166
                        $parser->errors[] = $error;
167
                    }
168 8
169
                    break;
170
                }
171
172
                $this->withers[$wither]->statement = $subParser;
173
174
                $state = 4;
175
            } elseif ($state === 4) {
176 16
                if ($token->value === ',') {
177
                    // There's another WITH expression to parse, go back to state=0
178 16
                    $state = 0;
179
                    continue;
180 16
                }
181 16
182
                if (
183 16
                    $token->type === Token::TYPE_KEYWORD && (
184 16
                    $token->value === 'SELECT'
185 8
                    || $token->value === 'INSERT'
186 16
                    || $token->value === 'UPDATE'
187 16
                    || $token->value === 'DELETE'
188 12
                    )
189
                ) {
190
                    $state = 5;
191
                    --$list->idx;
192 16
                    continue;
193 16
                }
194 4
195
                $parser->error('An expression was expected.', $token);
196
                break;
197 16
            } elseif ($state === 5) {
198
                /**
199
                 * We need to parse all of the remaining tokens becuase mostly, they are only the CTE expression
200
                 * which's mostly is SELECT, or INSERT, UPDATE, or delete statement.
201 16
                 * e.g: INSERT .. ( SELECT 1 ) SELECT col1 FROM cte ON DUPLICATE KEY UPDATE col_name = 3.
202 4
                 * The issue is that, `ON DUPLICATE KEY UPDATE col_name = 3` is related to the main INSERT query
203
                 * not the cte expression (SELECT col1 FROM cte) we need to determine the end of the expression
204 4
                 * to parse `ON DUPLICATE KEY UPDATE` from the InsertStatement parser instead.
205 4
                 */
206 1
207
                // Index of the last parsed token by default would be the last token in the $list, because we're
208
                // assuming that all remaining tokens at state 4, are related to the expression.
209
                $idxOfLastParsedToken = $list->count - 1;
210 12
                // Index before search to be able to restore the index.
211
                $idxBeforeSearch = $list->idx;
212 12
                // Length of expression tokens is null by default, in order for the $subList to start
213
                // from $list->idx to the end of the $list.
214
                $lengthOfExpressionTokens = null;
215
216
                if ($list->getNextOfTypeAndValue(Token::TYPE_KEYWORD, 'ON')) {
217
                    // (-1) because getNextOfTypeAndValue returned ON and increased the index.
218
                    $idxOfOn = $list->idx - 1;
219
                    // Index of the last parsed token will be the token before the ON Keyword, therefore $idxOfOn - 1.
220
                    $idxOfLastParsedToken = $idxOfOn - 1;
221
                    // The length of the expression tokens would be the difference
222
                    // between the first unrelated token `ON` and the idx
223
                    // before skipping the CTE tokens.
224
                    $lengthOfExpressionTokens = $idxOfOn - $idxBeforeSearch;
225
                }
226
227
                // Restore the index
228
                $list->idx = $idxBeforeSearch;
229
230
                $subList = new TokensList(array_slice($list->tokens, $list->idx, $lengthOfExpressionTokens));
231
                $subParser = new Parser($subList);
232
                if (count($subParser->errors)) {
233
                    foreach ($subParser->errors as $error) {
234
                        $parser->errors[] = $error;
235
                    }
236
                }
237
238
                $list->idx = $idxOfLastParsedToken;
239
                break;
240
            }
241
        }
242
243
        // 5 is the only valid end state
244
        if ($state !== 5) {
0 ignored issues
show
introduced by
The condition $state !== 5 is always true.
Loading history...
245
             /**
246
             * Token parsed at this moment.
247
             *
248
             * @var Token
249
             */
250
            $token = $list->tokens[$list->idx];
251
252
            $parser->error('Unexpected end of the WITH CTE.', $token);
253
        }
254
255
        --$list->idx;
256
    }
257
258
    /**
259
     * {@inheritdoc}
260
     */
261
    public function build()
262
    {
263
        $str = 'WITH ';
264
265
        foreach ($this->withers as $wither) {
266
            $str .= $str === 'WITH ' ? '' : ', ';
267
            $str .= WithKeyword::build($wither);
268
        }
269
270
        return $str;
271
    }
272
273
    /**
274
     * Get tokens within the WITH expression to use them in another parser
275
     *
276
     * @return ParserException|TokensList
277
     */
278
    private function getSubTokenList(TokensList $list)
279
    {
280
        $idx = $list->idx;
281
        /** @var Token $token */
282
        $token = $list->tokens[$list->idx];
283
        $openParenthesis = 0;
284
285
        while ($list->idx < $list->count) {
286
            if ($token->value === '(') {
287
                ++$openParenthesis;
288
            } elseif ($token->value === ')') {
289
                if (--$openParenthesis === -1) {
290
                    break;
291
                }
292
            }
293
294
            ++$list->idx;
295
            if (! isset($list->tokens[$list->idx])) {
296
                break;
297
            }
298
299
            $token = $list->tokens[$list->idx];
300
        }
301
302
        // performance improvement: return the error to avoid a try/catch in the loop
303
        if ($list->idx === $list->count) {
304
            --$list->idx;
305
306
            return new ParserException(
307
                Translator::gettext('A closing bracket was expected.'),
308
                $token
309
            );
310
        }
311
312
        $length = $list->idx - $idx;
313
314
        return new TokensList(array_slice($list->tokens, $idx, $length), $length);
315
    }
316
}
317