Parser::parse()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 7
c 0
b 0
f 0
rs 9.4285
cc 1
eloc 4
nc 1
nop 1
1
<?php
2
3
namespace Netdudes\DataSourceryBundle\UQL;
4
5
use Netdudes\DataSourceryBundle\UQL\AST\ASTArray;
6
use Netdudes\DataSourceryBundle\UQL\AST\ASTAssertion;
7
use Netdudes\DataSourceryBundle\UQL\AST\ASTFunctionCall;
8
use Netdudes\DataSourceryBundle\UQL\AST\ASTGroup;
9
use Netdudes\DataSourceryBundle\UQL\Exception\Semantic\UqlUnexpectedEndOfExpressionException;
10
use Netdudes\DataSourceryBundle\UQL\Exception\Semantic\UqlUnexpectedTokenException;
11
use Netdudes\DataSourceryBundle\UQL\Exception\UQLSyntaxError;
12
13
/**
14
 * Class Parser
15
 *
16
 * The parser translates a linear stream of tokens into a logical Abstract
17
 * Syntax Tree (AST) that represents the logical structure of the language
18
 * with independence of the actual end-objects (filters).
19
 *
20
 * @package Netdudes\NetdudesDataSourceryBundle\UQL
21
 */
22
class Parser
23
{
24
    /**
25
     *
26
     * Grammar (<identifier>s and <literal>s are simple scalars defined by regular expressions on the lexer):
27
     *
28
     * <operator>       ::=     "<" | ">" | "<=" | ">=" | "!=" | "<>" | "="
29
     * <logic>          ::=     "AND" | "OR"
30
     * <assertion>      ::=     <identifier> <operator> <literal>
31
     * <concatenation>  ::=     <statement> { <logic> <statement> }
32
     * <group>          ::=     "(" <concatenation> ")"
33
     * <statement>      ::=     <assertion> | <group>
34
     * <query>          ::=     <concatenation>
35
     *
36
     */
37
38
    private $tokenIndex;
39
40
    private $tokenStream = [];
41
42
    public function __construct()
43
    {
44
        $this->tokenIndex = -1;
45
    }
46
47
    /**
48
     * Lex, initialise and return the AST.
49
     *
50
     * @param $string
51
     *
52
     * @return bool|ASTAssertion|ASTGroup
53
     */
54
    public function parse($string)
55
    {
56
        $this->tokenStream = Lexer::lex($string);
57
        $this->tokenIndex = -1;
58
59
        return $this->getAST();
60
    }
61
62
    /**
63
     * Entry point of the grammar parsing.
64
     *
65
     * @return bool|ASTAssertion|ASTGroup
66
     */
67
    public function getAST()
68
    {
69
        // The top-level syntax is, in general, a concatenation of statements with logic connectors.
70
        $concatenation = $this->matchConcatenation();
71
72
        // Make sure we are at the end of the UQL
73
        $token = $this->nextToken();
74
        if ($token === false) {
75
            return $concatenation;
76
        }
77
78
        $this->throwUnexpectedTokenSyntaxError(['LOGIC'], "Logic operator or end of UQL expected after statement in first-level concatenation");
79
    }
80
81
    /**
82
     * Tries to match the following tokens to a <concatenation> grammar.
83
     *
84
     * @return bool|ASTAssertion|ASTGroup
85
     */
86
    public function matchConcatenation()
87
    {
88
        $elements = [];
89
90
        $firstStatement = $this->matchStatement();
91
92
        if ($firstStatement === false) {
93
            $this->throwUnexpectedTokenSyntaxError(['IDENTIFIER', 'GROUP_START'], 'Expected statement at beginning of concatenation.');
94
        }
95
96
        $elements[] = $firstStatement;
97
98
        $firstLogic = $this->matchLogic();
99
        if ($firstLogic === false) {
100
            // There is no actual concatenation. This is a single statement. Return as such.
101
            return $firstStatement;
102
        }
103
        $logic = $firstLogic;
104
105
        // While there are concatenating logic operators, keep adding elements.
106
        while ($logic !== false) {
107
            if ($logic['token'] != $firstLogic['token']) {
108
                $this->throwSyntaxError('Can\'t mix ORs and ANDs in same-level expression, ambiguous statement.');
109
            }
110
            $statement = $this->matchStatement();
111
            if ($statement === false) {
112
                $this->throwUnexpectedTokenSyntaxError(['IDENTIFIER', 'GROUP_START'], 'Expected statement after logic operator');
113
            }
114
            $elements[] = $statement;
115
            $logic = $this->matchLogic();
116
        }
117
118
        return new ASTGroup($firstLogic['token'], $elements);
119
    }
120
121
    /**
122
     * Tries to match a general <statement>, that is a <group> or <assertion>
123
     *
124
     * @return bool|ASTAssertion|ASTGroup
125
     */
126
    public function matchStatement()
127
    {
128
        // Try <group>
129
        $matchGroup = $this->matchGroup();
130
131
        if ($matchGroup !== false) {
132
            return $matchGroup;
133
        }
134
135
        // Try <assertion>
136
        $matchAssertion = $this->matchAssertion();
137
138
        if ($matchAssertion !== false) {
139
            return $matchAssertion;
140
        }
141
142
        // None found
143
        $this->rewindToken();
144
145
        return false;
146
    }
147
148
    /**
149
     * Tries to match a <group> grammar to the following tokens
150
     *
151
     * @return bool|ASTAssertion|ASTGroup
152
     */
153
    public function matchGroup()
154
    {
155
        $token = $this->nextToken();
156
157
        // Check for the open parenthesis
158
        if ($token['token'] != "T_BRACKET_OPEN") {
159
            $this->rewindToken();
160
161
            return false;
162
        }
163
164
        // The interior of a group is a <concatenation>
165
        $concatenation = $this->matchConcatenation();
166
167
        $token = $this->nextToken();
168
169
        // Check for closed parenthesis. Mismatch is a Syntax Error.
170
        if ($token['token'] != "T_BRACKET_CLOSE") {
171
            $this->throwUnexpectedTokenSyntaxError(['GROUP_END'], 'Expected closing bracket.');
172
        }
173
174
        return $concatenation;
175
    }
176
177
    /**
178
     * Tries to match the following tokens to an <assertion>.
179
     *
180
     * @throws Exception\UQLSyntaxError
181
     * @return bool|ASTAssertion
182
     */
183
    public function matchAssertion()
184
    {
185
        $identifier = $this->nextToken();
186
187
        if ($identifier['token'] != 'T_IDENTIFIER') {
188
            // If a stream doesn't start with an identifier, it's not an <assertion>.
189
            $this->rewindToken();
190
191
            return false;
192
        }
193
194
        $operator = $this->matchOperator();
195
196
        if ($operator === false) {
197
            $this->nextToken(); // MatchOperator rewinds
198
            $this->throwUnexpectedTokenSyntaxError(['OPERATOR'], 'Comparison operator expected after identifier');
199
        }
200
201
        $array = $this->matchArray();
202
        if ($array) {
203
            if (!in_array($operator['token'], ['T_OP_IN', 'T_OP_NIN'])) {
204
                throw new UQLSyntaxError("Arrays are only valid after IN or NOT IN operators");
205
            }
206
207
            return new ASTAssertion($identifier['match'], $operator['token'], $array);
208
        }
209
210
        $literal = $this->nextToken();
211
212
        if ($literal['token'] == 'T_FUNCTION_CALL') {
213
            return new ASTAssertion($identifier['match'], $operator['token'], ASTFunctionCall::createFromExpression($literal['match']));
214
        }
215
216
        if (strpos($literal['token'], 'T_LITERAL') !== 0) {
217
            $this->throwUnexpectedTokenSyntaxError(['ARRAY_START', 'LITERAL'], 'Array, value or function call expected after comparison operator');
218
        }
219
        $literal = $this->transformLiteral($literal);
220
221
        return new ASTAssertion($identifier['match'], $operator['token'], $literal['match']);
222
    }
223
224
    /**
225
     * Tries to match the next token to an <operator>.
226
     *
227
     * @return bool
228
     */
229
    public function matchOperator()
230
    {
231
        $operator = $this->nextToken();
232
233
        switch ($operator['token']) {
234
            case 'T_OP_NEQ':
235
            case 'T_OP_LTE':
236
            case 'T_OP_LT':
237
            case 'T_OP_GTE':
238
            case 'T_OP_GT':
239
            case 'T_OP_EQ':
240
            case 'T_OP_LIKE':
241
            case 'T_OP_IN':
242
            case 'T_OP_NIN':
243
                return $operator;
244
                break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
245
            default:
246
                $this->rewindToken();
247
248
                return false;
249
        }
250
    }
251
252
    public function matchArray()
253
    {
254
        $token = $this->nextToken();
255
        if ($token['token'] != "T_ARRAY_OPEN") {
256
            $this->rewindToken();
257
258
            return false;
259
        }
260
261
        $element = $this->nextToken();
262
        if ($element['token'] == "T_ARRAY_CLOSE") {
263
            // Empty array
264
            return new ASTArray();
265
        }
266
267
        $elements = [$element['match']];
268
        $comma = $this->nextToken();
269
        while ($comma['token'] == "T_ARRAY_SEPARATOR") {
270
            $element = $this->nextToken();
271
            if ($element['token'] !== 'T_LITERAL') {
272
                $this->throwUnexpectedTokenSyntaxError(['LITERAL'], "An array must consist of literals");
273
            }
274
            $elements[] = $element['match'];
275
            $comma = $this->nextToken();
276
        }
277
        if ($comma['token'] != 'T_ARRAY_CLOSE') {
278
            // Unterminated array
279
           $this->throwUnexpectedTokenSyntaxError(['ARRAY_END'], "An array must end with ']'.");
280
        }
281
282
        return new ASTArray($elements);
283
    }
284
285
    /**
286
     * Tries to match the next token to a <logic> operator
287
     *
288
     * @return bool
289
     */
290
    public function matchLogic()
291
    {
292
        $token = $this->nextToken();
293
294
        if ($token['token'] == 'T_LOGIC_AND' || $token['token'] == 'T_LOGIC_OR' || $token['token'] == 'T_LOGIC_XOR') {
295
            return $token;
296
        }
297
298
        // None found
299
        $this->rewindToken();
300
301
        return false;
302
    }
303
304
    /**
305
     * @return mixed
306
     */
307
    public function getTokenStream()
308
    {
309
        return $this->tokenStream;
310
    }
311
312
    /**
313
     * @param mixed $tokenStream
314
     */
315
    public function setTokenStream($tokenStream)
316
    {
317
        $this->tokenStream = $tokenStream;
0 ignored issues
show
Documentation Bug introduced by
It seems like $tokenStream of type * is incompatible with the declared type array of property $tokenStream.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
318
    }
319
320
    /**
321
     * @return mixed
322
     */
323
    public function getTokenIndex()
324
    {
325
        return $this->tokenIndex;
326
    }
327
328
    /**
329
     * @param mixed $tokenIndex
330
     */
331
    public function setTokenIndex($tokenIndex)
332
    {
333
        $this->tokenIndex = $tokenIndex;
334
    }
335
336
    /**
337
     * Advance the token index and return.
338
     *
339
     * @return bool
340
     */
341
    private function nextToken()
342
    {
343
        $this->tokenIndex++;
344
345
        return $this->currentToken();
346
    }
347
348
    /**
349
     * Return the current token, without advancing the index.
350
     *
351
     * @return bool
352
     */
353
    private function currentToken()
354
    {
355
        return isset($this->tokenStream[$this->tokenIndex]) ? $this->tokenStream[$this->tokenIndex] : false;
356
    }
357
358
    /**
359
     * Move back the token index once.
360
     */
361
    private function rewindToken()
362
    {
363
        $this->tokenIndex--;
364
    }
365
366
    /**
367
     * Helper method. Throws an Exception representing a Syntax Error.
368
     *
369
     * @param $message
370
     *
371
     * @throws \Exception
372
     */
373
    private function throwUnexpectedTokenSyntaxError(array $expectedTokenCategories, $message = null)
374
    {
375
        $parsedTokenStream = array_slice($this->tokenStream, 0, $this->tokenIndex + 1);
376
        if ($this->currentToken() === false) {
377
            throw new UqlUnexpectedEndOfExpressionException(
378
                $expectedTokenCategories,
379
                $parsedTokenStream,
380
                $message);
381
        }
382
        throw new UqlUnexpectedTokenException(
383
            $this->currentToken()['token'],
384
            $this->currentToken()['match'],
385
            $expectedTokenCategories,
386
            $parsedTokenStream,
387
            $message
388
        );
389
    }
390
391
    private function throwSyntaxError($message)
392
    {
393
        throw new UQLSyntaxError('Syntax error: ' . $message);
394
    }
395
396
    /**
397
     * Transforms a literal subtype (e.g. T_LITERAL_FALSE) into a plain
398
     * literal match. Plain literals are unchanged.
399
     *
400
     * @param $literal
401
     *
402
     * @return array
403
     */
404
    private function transformLiteral($literal)
405
    {
406
        switch ($literal['token']) {
407
            case 'T_LITERAL_FALSE':
408
                $match = false;
409
                break;
410
            case 'T_LITERAL_TRUE':
411
                $match = true;
412
                break;
413
            case 'T_LITERAL_EMPTY':
414
                $match = null;
415
                break;
416
            default:
417
                $match = $literal['match'];
418
        }
419
420
        return [
421
            'token' => 'T_LITERAL',
422
            'match' => $match
423
        ];
424
    }
425
}
426