netdudes /
DataSourceryBundle
This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
| 1 | <?php |
||
| 2 | |||
| 3 | namespace Netdudes\DataSourceryBundle\UQL; |
||
| 4 | |||
| 5 | use Netdudes\DataSourceryBundle\UQL\AST\ASTArray; |
||
| 6 | use Netdudes\DataSourceryBundle\UQL\AST\ASTAssertion; |
||
| 7 | use Netdudes\DataSourceryBundle\UQL\AST\ASTFunctionCall; |
||
| 8 | use Netdudes\DataSourceryBundle\UQL\AST\ASTGroup; |
||
| 9 | use Netdudes\DataSourceryBundle\UQL\Exception\Semantic\UqlUnexpectedEndOfExpressionException; |
||
| 10 | use Netdudes\DataSourceryBundle\UQL\Exception\Semantic\UqlUnexpectedTokenException; |
||
| 11 | use Netdudes\DataSourceryBundle\UQL\Exception\UQLSyntaxError; |
||
| 12 | |||
| 13 | /** |
||
| 14 | * Class Parser |
||
| 15 | * |
||
| 16 | * The parser translates a linear stream of tokens into a logical Abstract |
||
| 17 | * Syntax Tree (AST) that represents the logical structure of the language |
||
| 18 | * with independence of the actual end-objects (filters). |
||
| 19 | * |
||
| 20 | * @package Netdudes\NetdudesDataSourceryBundle\UQL |
||
| 21 | */ |
||
| 22 | class Parser |
||
| 23 | { |
||
| 24 | /** |
||
| 25 | * |
||
| 26 | * Grammar (<identifier>s and <literal>s are simple scalars defined by regular expressions on the lexer): |
||
| 27 | * |
||
| 28 | * <operator> ::= "<" | ">" | "<=" | ">=" | "!=" | "<>" | "=" |
||
| 29 | * <logic> ::= "AND" | "OR" |
||
| 30 | * <assertion> ::= <identifier> <operator> <literal> |
||
| 31 | * <concatenation> ::= <statement> { <logic> <statement> } |
||
| 32 | * <group> ::= "(" <concatenation> ")" |
||
| 33 | * <statement> ::= <assertion> | <group> |
||
| 34 | * <query> ::= <concatenation> |
||
| 35 | * |
||
| 36 | */ |
||
| 37 | |||
| 38 | private $tokenIndex; |
||
| 39 | |||
| 40 | private $tokenStream = []; |
||
| 41 | |||
| 42 | public function __construct() |
||
| 43 | { |
||
| 44 | $this->tokenIndex = -1; |
||
| 45 | } |
||
| 46 | |||
| 47 | /** |
||
| 48 | * Lex, initialise and return the AST. |
||
| 49 | * |
||
| 50 | * @param $string |
||
| 51 | * |
||
| 52 | * @return bool|ASTAssertion|ASTGroup |
||
| 53 | */ |
||
| 54 | public function parse($string) |
||
| 55 | { |
||
| 56 | $this->tokenStream = Lexer::lex($string); |
||
| 57 | $this->tokenIndex = -1; |
||
| 58 | |||
| 59 | return $this->getAST(); |
||
| 60 | } |
||
| 61 | |||
| 62 | /** |
||
| 63 | * Entry point of the grammar parsing. |
||
| 64 | * |
||
| 65 | * @return bool|ASTAssertion|ASTGroup |
||
| 66 | */ |
||
| 67 | public function getAST() |
||
| 68 | { |
||
| 69 | // The top-level syntax is, in general, a concatenation of statements with logic connectors. |
||
| 70 | $concatenation = $this->matchConcatenation(); |
||
| 71 | |||
| 72 | // Make sure we are at the end of the UQL |
||
| 73 | $token = $this->nextToken(); |
||
| 74 | if ($token === false) { |
||
| 75 | return $concatenation; |
||
| 76 | } |
||
| 77 | |||
| 78 | $this->throwUnexpectedTokenSyntaxError(['LOGIC'], "Logic operator or end of UQL expected after statement in first-level concatenation"); |
||
| 79 | } |
||
| 80 | |||
| 81 | /** |
||
| 82 | * Tries to match the following tokens to a <concatenation> grammar. |
||
| 83 | * |
||
| 84 | * @return bool|ASTAssertion|ASTGroup |
||
| 85 | */ |
||
| 86 | public function matchConcatenation() |
||
| 87 | { |
||
| 88 | $elements = []; |
||
| 89 | |||
| 90 | $firstStatement = $this->matchStatement(); |
||
| 91 | |||
| 92 | if ($firstStatement === false) { |
||
| 93 | $this->throwUnexpectedTokenSyntaxError(['IDENTIFIER', 'GROUP_START'], 'Expected statement at beginning of concatenation.'); |
||
| 94 | } |
||
| 95 | |||
| 96 | $elements[] = $firstStatement; |
||
| 97 | |||
| 98 | $firstLogic = $this->matchLogic(); |
||
| 99 | if ($firstLogic === false) { |
||
| 100 | // There is no actual concatenation. This is a single statement. Return as such. |
||
| 101 | return $firstStatement; |
||
| 102 | } |
||
| 103 | $logic = $firstLogic; |
||
| 104 | |||
| 105 | // While there are concatenating logic operators, keep adding elements. |
||
| 106 | while ($logic !== false) { |
||
| 107 | if ($logic['token'] != $firstLogic['token']) { |
||
| 108 | $this->throwSyntaxError('Can\'t mix ORs and ANDs in same-level expression, ambiguous statement.'); |
||
| 109 | } |
||
| 110 | $statement = $this->matchStatement(); |
||
| 111 | if ($statement === false) { |
||
| 112 | $this->throwUnexpectedTokenSyntaxError(['IDENTIFIER', 'GROUP_START'], 'Expected statement after logic operator'); |
||
| 113 | } |
||
| 114 | $elements[] = $statement; |
||
| 115 | $logic = $this->matchLogic(); |
||
| 116 | } |
||
| 117 | |||
| 118 | return new ASTGroup($firstLogic['token'], $elements); |
||
| 119 | } |
||
| 120 | |||
| 121 | /** |
||
| 122 | * Tries to match a general <statement>, that is a <group> or <assertion> |
||
| 123 | * |
||
| 124 | * @return bool|ASTAssertion|ASTGroup |
||
| 125 | */ |
||
| 126 | public function matchStatement() |
||
| 127 | { |
||
| 128 | // Try <group> |
||
| 129 | $matchGroup = $this->matchGroup(); |
||
| 130 | |||
| 131 | if ($matchGroup !== false) { |
||
| 132 | return $matchGroup; |
||
| 133 | } |
||
| 134 | |||
| 135 | // Try <assertion> |
||
| 136 | $matchAssertion = $this->matchAssertion(); |
||
| 137 | |||
| 138 | if ($matchAssertion !== false) { |
||
| 139 | return $matchAssertion; |
||
| 140 | } |
||
| 141 | |||
| 142 | // None found |
||
| 143 | $this->rewindToken(); |
||
| 144 | |||
| 145 | return false; |
||
| 146 | } |
||
| 147 | |||
| 148 | /** |
||
| 149 | * Tries to match a <group> grammar to the following tokens |
||
| 150 | * |
||
| 151 | * @return bool|ASTAssertion|ASTGroup |
||
| 152 | */ |
||
| 153 | public function matchGroup() |
||
| 154 | { |
||
| 155 | $token = $this->nextToken(); |
||
| 156 | |||
| 157 | // Check for the open parenthesis |
||
| 158 | if ($token['token'] != "T_BRACKET_OPEN") { |
||
| 159 | $this->rewindToken(); |
||
| 160 | |||
| 161 | return false; |
||
| 162 | } |
||
| 163 | |||
| 164 | // The interior of a group is a <concatenation> |
||
| 165 | $concatenation = $this->matchConcatenation(); |
||
| 166 | |||
| 167 | $token = $this->nextToken(); |
||
| 168 | |||
| 169 | // Check for closed parenthesis. Mismatch is a Syntax Error. |
||
| 170 | if ($token['token'] != "T_BRACKET_CLOSE") { |
||
| 171 | $this->throwUnexpectedTokenSyntaxError(['GROUP_END'], 'Expected closing bracket.'); |
||
| 172 | } |
||
| 173 | |||
| 174 | return $concatenation; |
||
| 175 | } |
||
| 176 | |||
| 177 | /** |
||
| 178 | * Tries to match the following tokens to an <assertion>. |
||
| 179 | * |
||
| 180 | * @throws Exception\UQLSyntaxError |
||
| 181 | * @return bool|ASTAssertion |
||
| 182 | */ |
||
| 183 | public function matchAssertion() |
||
| 184 | { |
||
| 185 | $identifier = $this->nextToken(); |
||
| 186 | |||
| 187 | if ($identifier['token'] != 'T_IDENTIFIER') { |
||
| 188 | // If a stream doesn't start with an identifier, it's not an <assertion>. |
||
| 189 | $this->rewindToken(); |
||
| 190 | |||
| 191 | return false; |
||
| 192 | } |
||
| 193 | |||
| 194 | $operator = $this->matchOperator(); |
||
| 195 | |||
| 196 | if ($operator === false) { |
||
| 197 | $this->nextToken(); // MatchOperator rewinds |
||
| 198 | $this->throwUnexpectedTokenSyntaxError(['OPERATOR'], 'Comparison operator expected after identifier'); |
||
| 199 | } |
||
| 200 | |||
| 201 | $array = $this->matchArray(); |
||
| 202 | if ($array) { |
||
| 203 | if (!in_array($operator['token'], ['T_OP_IN', 'T_OP_NIN'])) { |
||
| 204 | throw new UQLSyntaxError("Arrays are only valid after IN or NOT IN operators"); |
||
| 205 | } |
||
| 206 | |||
| 207 | return new ASTAssertion($identifier['match'], $operator['token'], $array); |
||
| 208 | } |
||
| 209 | |||
| 210 | $literal = $this->nextToken(); |
||
| 211 | |||
| 212 | if ($literal['token'] == 'T_FUNCTION_CALL') { |
||
| 213 | return new ASTAssertion($identifier['match'], $operator['token'], ASTFunctionCall::createFromExpression($literal['match'])); |
||
| 214 | } |
||
| 215 | |||
| 216 | if (strpos($literal['token'], 'T_LITERAL') !== 0) { |
||
| 217 | $this->throwUnexpectedTokenSyntaxError(['ARRAY_START', 'LITERAL'], 'Array, value or function call expected after comparison operator'); |
||
| 218 | } |
||
| 219 | $literal = $this->transformLiteral($literal); |
||
| 220 | |||
| 221 | return new ASTAssertion($identifier['match'], $operator['token'], $literal['match']); |
||
| 222 | } |
||
| 223 | |||
| 224 | /** |
||
| 225 | * Tries to match the next token to an <operator>. |
||
| 226 | * |
||
| 227 | * @return bool |
||
| 228 | */ |
||
| 229 | public function matchOperator() |
||
| 230 | { |
||
| 231 | $operator = $this->nextToken(); |
||
| 232 | |||
| 233 | switch ($operator['token']) { |
||
| 234 | case 'T_OP_NEQ': |
||
| 235 | case 'T_OP_LTE': |
||
| 236 | case 'T_OP_LT': |
||
| 237 | case 'T_OP_GTE': |
||
| 238 | case 'T_OP_GT': |
||
| 239 | case 'T_OP_EQ': |
||
| 240 | case 'T_OP_LIKE': |
||
| 241 | case 'T_OP_IN': |
||
| 242 | case 'T_OP_NIN': |
||
| 243 | return $operator; |
||
| 244 | break; |
||
|
0 ignored issues
–
show
|
|||
| 245 | default: |
||
| 246 | $this->rewindToken(); |
||
| 247 | |||
| 248 | return false; |
||
| 249 | } |
||
| 250 | } |
||
| 251 | |||
| 252 | public function matchArray() |
||
| 253 | { |
||
| 254 | $token = $this->nextToken(); |
||
| 255 | if ($token['token'] != "T_ARRAY_OPEN") { |
||
| 256 | $this->rewindToken(); |
||
| 257 | |||
| 258 | return false; |
||
| 259 | } |
||
| 260 | |||
| 261 | $element = $this->nextToken(); |
||
| 262 | if ($element['token'] == "T_ARRAY_CLOSE") { |
||
| 263 | // Empty array |
||
| 264 | return new ASTArray(); |
||
| 265 | } |
||
| 266 | |||
| 267 | $elements = [$element['match']]; |
||
| 268 | $comma = $this->nextToken(); |
||
| 269 | while ($comma['token'] == "T_ARRAY_SEPARATOR") { |
||
| 270 | $element = $this->nextToken(); |
||
| 271 | if ($element['token'] !== 'T_LITERAL') { |
||
| 272 | $this->throwUnexpectedTokenSyntaxError(['LITERAL'], "An array must consist of literals"); |
||
| 273 | } |
||
| 274 | $elements[] = $element['match']; |
||
| 275 | $comma = $this->nextToken(); |
||
| 276 | } |
||
| 277 | if ($comma['token'] != 'T_ARRAY_CLOSE') { |
||
| 278 | // Unterminated array |
||
| 279 | $this->throwUnexpectedTokenSyntaxError(['ARRAY_END'], "An array must end with ']'."); |
||
| 280 | } |
||
| 281 | |||
| 282 | return new ASTArray($elements); |
||
| 283 | } |
||
| 284 | |||
| 285 | /** |
||
| 286 | * Tries to match the next token to a <logic> operator |
||
| 287 | * |
||
| 288 | * @return bool |
||
| 289 | */ |
||
| 290 | public function matchLogic() |
||
| 291 | { |
||
| 292 | $token = $this->nextToken(); |
||
| 293 | |||
| 294 | if ($token['token'] == 'T_LOGIC_AND' || $token['token'] == 'T_LOGIC_OR' || $token['token'] == 'T_LOGIC_XOR') { |
||
| 295 | return $token; |
||
| 296 | } |
||
| 297 | |||
| 298 | // None found |
||
| 299 | $this->rewindToken(); |
||
| 300 | |||
| 301 | return false; |
||
| 302 | } |
||
| 303 | |||
| 304 | /** |
||
| 305 | * @return mixed |
||
| 306 | */ |
||
| 307 | public function getTokenStream() |
||
| 308 | { |
||
| 309 | return $this->tokenStream; |
||
| 310 | } |
||
| 311 | |||
| 312 | /** |
||
| 313 | * @param mixed $tokenStream |
||
| 314 | */ |
||
| 315 | public function setTokenStream($tokenStream) |
||
| 316 | { |
||
| 317 | $this->tokenStream = $tokenStream; |
||
|
0 ignored issues
–
show
It seems like
$tokenStream of type * is incompatible with the declared type array of property $tokenStream.
Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property. Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property.. Loading history...
|
|||
| 318 | } |
||
| 319 | |||
| 320 | /** |
||
| 321 | * @return mixed |
||
| 322 | */ |
||
| 323 | public function getTokenIndex() |
||
| 324 | { |
||
| 325 | return $this->tokenIndex; |
||
| 326 | } |
||
| 327 | |||
| 328 | /** |
||
| 329 | * @param mixed $tokenIndex |
||
| 330 | */ |
||
| 331 | public function setTokenIndex($tokenIndex) |
||
| 332 | { |
||
| 333 | $this->tokenIndex = $tokenIndex; |
||
| 334 | } |
||
| 335 | |||
| 336 | /** |
||
| 337 | * Advance the token index and return. |
||
| 338 | * |
||
| 339 | * @return bool |
||
| 340 | */ |
||
| 341 | private function nextToken() |
||
| 342 | { |
||
| 343 | $this->tokenIndex++; |
||
| 344 | |||
| 345 | return $this->currentToken(); |
||
| 346 | } |
||
| 347 | |||
| 348 | /** |
||
| 349 | * Return the current token, without advancing the index. |
||
| 350 | * |
||
| 351 | * @return bool |
||
| 352 | */ |
||
| 353 | private function currentToken() |
||
| 354 | { |
||
| 355 | return isset($this->tokenStream[$this->tokenIndex]) ? $this->tokenStream[$this->tokenIndex] : false; |
||
| 356 | } |
||
| 357 | |||
| 358 | /** |
||
| 359 | * Move back the token index once. |
||
| 360 | */ |
||
| 361 | private function rewindToken() |
||
| 362 | { |
||
| 363 | $this->tokenIndex--; |
||
| 364 | } |
||
| 365 | |||
| 366 | /** |
||
| 367 | * Helper method. Throws an Exception representing a Syntax Error. |
||
| 368 | * |
||
| 369 | * @param $message |
||
| 370 | * |
||
| 371 | * @throws \Exception |
||
| 372 | */ |
||
| 373 | private function throwUnexpectedTokenSyntaxError(array $expectedTokenCategories, $message = null) |
||
| 374 | { |
||
| 375 | $parsedTokenStream = array_slice($this->tokenStream, 0, $this->tokenIndex + 1); |
||
| 376 | if ($this->currentToken() === false) { |
||
| 377 | throw new UqlUnexpectedEndOfExpressionException( |
||
| 378 | $expectedTokenCategories, |
||
| 379 | $parsedTokenStream, |
||
| 380 | $message); |
||
| 381 | } |
||
| 382 | throw new UqlUnexpectedTokenException( |
||
| 383 | $this->currentToken()['token'], |
||
| 384 | $this->currentToken()['match'], |
||
| 385 | $expectedTokenCategories, |
||
| 386 | $parsedTokenStream, |
||
| 387 | $message |
||
| 388 | ); |
||
| 389 | } |
||
| 390 | |||
| 391 | private function throwSyntaxError($message) |
||
| 392 | { |
||
| 393 | throw new UQLSyntaxError('Syntax error: ' . $message); |
||
| 394 | } |
||
| 395 | |||
| 396 | /** |
||
| 397 | * Transforms a literal subtype (e.g. T_LITERAL_FALSE) into a plain |
||
| 398 | * literal match. Plain literals are unchanged. |
||
| 399 | * |
||
| 400 | * @param $literal |
||
| 401 | * |
||
| 402 | * @return array |
||
| 403 | */ |
||
| 404 | private function transformLiteral($literal) |
||
| 405 | { |
||
| 406 | switch ($literal['token']) { |
||
| 407 | case 'T_LITERAL_FALSE': |
||
| 408 | $match = false; |
||
| 409 | break; |
||
| 410 | case 'T_LITERAL_TRUE': |
||
| 411 | $match = true; |
||
| 412 | break; |
||
| 413 | case 'T_LITERAL_EMPTY': |
||
| 414 | $match = null; |
||
| 415 | break; |
||
| 416 | default: |
||
| 417 | $match = $literal['match']; |
||
| 418 | } |
||
| 419 | |||
| 420 | return [ |
||
| 421 | 'token' => 'T_LITERAL', |
||
| 422 | 'match' => $match |
||
| 423 | ]; |
||
| 424 | } |
||
| 425 | } |
||
| 426 |
The break statement is not necessary if it is preceded for example by a return statement:
If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.