WithStatement::build()   A
last analyzed

Complexity

Conditions 5
Paths 6

Size

Total Lines 18
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 5

Importance

Changes 0
Metric Value
cc 5
eloc 9
nc 6
nop 0
dl 0
loc 18
ccs 10
cts 10
cp 1
crap 5
rs 9.6111
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace PhpMyAdmin\SqlParser\Statements;
6
7
use PhpMyAdmin\SqlParser\Components\WithKeyword;
8
use PhpMyAdmin\SqlParser\Exceptions\ParserException;
9
use PhpMyAdmin\SqlParser\Parser;
10
use PhpMyAdmin\SqlParser\Parsers\Array2d;
11
use PhpMyAdmin\SqlParser\Parsers\OptionsArrays;
12
use PhpMyAdmin\SqlParser\Statement;
13
use PhpMyAdmin\SqlParser\TokensList;
14
use PhpMyAdmin\SqlParser\TokenType;
15
use PhpMyAdmin\SqlParser\Translator;
16
17
use function array_slice;
18
use function preg_match;
19
20
/**
21
 * `WITH` statement.
22
23
 *  WITH [RECURSIVE] query_name [ (column_name [,...]) ] AS (SELECT ...) [, ...]
24
 */
25
final class WithStatement extends Statement
26
{
27
    /**
28
     * Options for `WITH` statements and their slot ID.
29
     *
30
     * @var array<string, int|array<int, int|string>>
31
     * @psalm-var array<string, (positive-int|array{positive-int, ('var'|'var='|'expr'|'expr=')})>
32
     */
33
    public static array $statementOptions = ['RECURSIVE' => 1];
34
35
    /**
36
     * The clauses of this statement, in order.
37
     *
38
     * @see Statement::$clauses
39
     *
40
     * @var array<string, array{non-empty-string, int-mask-of<self::ADD_*>}>
0 ignored issues
show
Documentation Bug introduced by
The doc comment array<string, array{non-...-mask-of<self::ADD_*>}> at position 6 could not be parsed: Expected ':' at position 6, but found 'non-empty-string'.
Loading history...
41
     */
42
    public static array $clauses = [
43
        'WITH' => [
44
            'WITH',
45
            Statement::ADD_KEYWORD,
46
        ],
47
        // Used for options.
48
        '_OPTIONS' => [
49
            '_OPTIONS',
50
            Statement::ADD_CLAUSE,
51
        ],
52
        'AS' => [
53
            'AS',
54
            Statement::ADD_KEYWORD,
55
        ],
56
    ];
57
58
    /** @var WithKeyword[] */
59
    public array $withers = [];
60
61
    /**
62
     * holds the CTE parser.
63
     */
64
    public Parser|null $cteStatementParser = null;
65
66
    /**
67
     * @param Parser     $parser the instance that requests parsing
68
     * @param TokensList $list   the list of tokens to be parsed
69
     */
70 52
    public function parse(Parser $parser, TokensList $list): void
71
    {
72
        /**
73
         * The state of the parser.
74
         *
75
         * Below are the states of the parser.
76
         *
77
         *      0 ---------------- [ name ] -----------------> 1
78
         *
79
         *      1 ------------------ [ ( ] ------------------> 2
80
         *
81
         *      2 ------------------ [ AS ] -----------------> 3
82
         *
83
         *      3 ------------------ [ ( ] ------------------> 4
84
         *
85
         *      4 ------------------ [ , ] ------------------> 1
86
         *
87
         *      4 ----- [ SELECT/UPDATE/DELETE/INSERT ] -----> 5
88
         */
89 52
        $state = 0;
90 52
        $wither = null;
91
92 52
        ++$list->idx; // Skipping `WITH`.
93
94
        // parse any options if provided
95 52
        $this->options = OptionsArrays::parse($parser, $list, static::$statementOptions);
96 52
        ++$list->idx;
97
98 52
        for (; $list->idx < $list->count; ++$list->idx) {
99
            /**
100
             * Token parsed at this moment.
101
             */
102 52
            $token = $list->tokens[$list->idx];
103
104
            // Skipping whitespaces and comments.
105 52
            if ($token->type === TokenType::Whitespace || $token->type === TokenType::Comment) {
106 46
                continue;
107
            }
108
109 52
            if ($state === 0) {
110 52
                if ($token->type !== TokenType::None || ! preg_match('/^[a-zA-Z0-9_$]+$/', $token->token)) {
111 6
                    $parser->error('The name of the CTE was expected.', $token);
112 6
                    break;
113
                }
114
115 48
                $wither = $token->value;
116 48
                $this->withers[$wither] = new WithKeyword($wither);
117 48
                $state = 1;
118 48
            } elseif ($state === 1) {
119 48
                if ($token->type === TokenType::Operator && $token->value === '(') {
120 40
                    $columns = Array2d::parse($parser, $list);
121 40
                    if ($parser->errors !== []) {
122 2
                        break;
123
                    }
124
125 38
                    $this->withers[$wither]->columns = $columns;
126 38
                    $state = 2;
127 12
                } elseif ($token->type === TokenType::Keyword && $token->keyword === 'AS') {
128 10
                    $state = 3;
129
                } else {
130 2
                    $parser->error('Unexpected token.', $token);
131 2
                    break;
132
                }
133 44
            } elseif ($state === 2) {
134 38
                if (! ($token->type === TokenType::Keyword && $token->keyword === 'AS')) {
135 2
                    $parser->error('AS keyword was expected.', $token);
136 2
                    break;
137
                }
138
139 36
                $state = 3;
140 42
            } elseif ($state === 3) {
141 42
                $idxBeforeGetNext = $list->idx;
142
143 42
                $list->idx++; // Ignore the current token
144 42
                $nextKeyword = $list->getNext();
145
146 42
                if (! ($token->value === '(' && ($nextKeyword && $nextKeyword->value === 'SELECT'))) {
147 4
                    $parser->error('Subquery of the CTE was expected.', $token);
148 4
                    $list->idx = $idxBeforeGetNext;
149 4
                    break;
150
                }
151
152
                // Restore the index
153 38
                $list->idx = $idxBeforeGetNext;
154
155 38
                ++$list->idx;
156 38
                $subList = $this->getSubTokenList($list);
157 38
                if ($subList instanceof ParserException) {
158 2
                    $parser->errors[] = $subList;
159 2
                    break;
160
                }
161
162 36
                $subParser = new Parser($subList);
163
164 36
                if ($subParser->errors !== []) {
165 2
                    foreach ($subParser->errors as $error) {
166 2
                        $parser->errors[] = $error;
167
                    }
168
169 2
                    break;
170
                }
171
172 34
                $this->withers[$wither]->statement = $subParser;
173
174 34
                $state = 4;
175 34
            } elseif ($state === 4) {
176 34
                if ($token->value === ',') {
177
                    // There's another WITH expression to parse, go back to state=0
178 10
                    $state = 0;
179 10
                    continue;
180
                }
181
182
                if (
183 32
                    $token->type === TokenType::Keyword && (
184 32
                    $token->value === 'SELECT'
185 32
                    || $token->value === 'INSERT'
186 32
                    || $token->value === 'UPDATE'
187 32
                    || $token->value === 'DELETE'
188
                    )
189
                ) {
190 26
                    $state = 5;
191 26
                    --$list->idx;
192 26
                    continue;
193
                }
194
195 6
                $parser->error('An expression was expected.', $token);
196 6
                break;
197 26
            } elseif ($state === 5) {
198
                /**
199
                 * We need to parse all of the remaining tokens becuase mostly, they are only the CTE expression
200
                 * which's mostly is SELECT, or INSERT, UPDATE, or delete statement.
201
                 * e.g: INSERT .. ( SELECT 1 ) SELECT col1 FROM cte ON DUPLICATE KEY UPDATE col_name = 3.
202
                 * The issue is that, `ON DUPLICATE KEY UPDATE col_name = 3` is related to the main INSERT query
203
                 * not the cte expression (SELECT col1 FROM cte) we need to determine the end of the expression
204
                 * to parse `ON DUPLICATE KEY UPDATE` from the InsertStatement parser instead.
205
                 */
206
207
                // Index of the last parsed token by default would be the last token in the $list, because we're
208
                // assuming that all remaining tokens at state 4, are related to the expression.
209 26
                $idxOfLastParsedToken = $list->count - 1;
210
                // Index before search to be able to restore the index.
211 26
                $idxBeforeSearch = $list->idx;
212
                // Length of expression tokens is null by default, in order for the $subList to start
213
                // from $list->idx to the end of the $list.
214 26
                $lengthOfExpressionTokens = null;
215
216 26
                if ($list->getNextOfTypeAndValue(TokenType::Keyword, 'ON')) {
217
                    // (-1) because getNextOfTypeAndValue returned ON and increased the index.
218 4
                    $idxOfOn = $list->idx - 1;
219
                    // We want to make sure that it's `ON DUPLICATE KEY UPDATE`
220 4
                    $dubplicateToken = $list->getNext();
221 4
                    $keyToken = $list->getNext();
222 4
                    $updateToken = $list->getNext();
223
                    if (
224 4
                        $dubplicateToken && $dubplicateToken->keyword === 'DUPLICATE'
225 4
                        && ($keyToken && $keyToken->keyword === 'KEY')
226 4
                        && ($updateToken && $updateToken->keyword === 'UPDATE')
227
                    ) {
228
                        // Index of the last parsed token will be the token before the ON Keyword
229 2
                        $idxOfLastParsedToken = $idxOfOn - 1;
230
                        // The length of the expression tokens would be the difference
231
                        // between the first unrelated token `ON` and the idx
232
                        // before skipping the CTE tokens.
233 2
                        $lengthOfExpressionTokens = $idxOfOn - $idxBeforeSearch;
234
                    }
235
                }
236
237
                // Restore the index
238 26
                $list->idx = $idxBeforeSearch;
239
240 26
                $subList = new TokensList(array_slice($list->tokens, $list->idx, $lengthOfExpressionTokens));
241 26
                $subParser = new Parser($subList);
242 26
                if ($subParser->errors !== []) {
243 2
                    foreach ($subParser->errors as $error) {
244 2
                        $parser->errors[] = $error;
245
                    }
246
247 2
                    break;
248
                }
249
250 24
                $this->cteStatementParser = $subParser;
251
252 24
                $list->idx = $idxOfLastParsedToken;
253 24
                break;
254
            }
255
        }
256
257
        // 5 is the only valid end state
258 52
        if ($state !== 5) {
0 ignored issues
show
introduced by
The condition $state !== 5 is always true.
Loading history...
259
             /**
260
             * Token parsed at this moment.
261
             */
262 26
            $token = $list->tokens[$list->idx];
263
264 26
            $parser->error('Unexpected end of the WITH CTE.', $token);
265
        }
266
267 52
        --$list->idx;
268
    }
269
270 10
    public function build(): string
271
    {
272 10
        $str = 'WITH ';
273
274 10
        foreach ($this->withers as $wither) {
275 10
            $str .= $str === 'WITH ' ? '' : ', ';
276 10
            $str .= $wither->build();
277
        }
278
279 10
        $str .= ' ';
280
281 10
        if ($this->cteStatementParser) {
282 10
            foreach ($this->cteStatementParser->statements as $statement) {
283 10
                    $str .= $statement->build();
284
            }
285
        }
286
287 10
        return $str;
288
    }
289
290
    /**
291
     * Get tokens within the WITH expression to use them in another parser
292
     */
293 38
    private function getSubTokenList(TokensList $list): ParserException|TokensList
294
    {
295 38
        $idx = $list->idx;
296 38
        $token = $list->tokens[$list->idx];
297 38
        $openParenthesis = 0;
298
299 38
        while ($list->idx < $list->count) {
300 38
            if ($token->value === '(') {
301 4
                ++$openParenthesis;
302 38
            } elseif ($token->value === ')') {
303 38
                if (--$openParenthesis === -1) {
304 36
                    break;
305
                }
306
            }
307
308 38
            ++$list->idx;
309 38
            if (! isset($list->tokens[$list->idx])) {
310 2
                break;
311
            }
312
313 38
            $token = $list->tokens[$list->idx];
314
        }
315
316
        // performance improvement: return the error to avoid a try/catch in the loop
317 38
        if ($list->idx === $list->count) {
318 2
            --$list->idx;
319
320 2
            return new ParserException(
321 2
                Translator::gettext('A closing bracket was expected.'),
322 2
                $token,
323 2
            );
324
        }
325
326 36
        $length = $list->idx - $idx;
327
328 36
        return new TokensList(array_slice($list->tokens, $idx, $length));
329
    }
330
}
331