Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Lexer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Lexer, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
42 | class Lexer extends Core |
||
43 | { |
||
44 | /** |
||
45 | * A list of methods that are used in lexing the SQL query. |
||
46 | * |
||
47 | * @var array |
||
48 | */ |
||
49 | public static $PARSER_METHODS = array( |
||
50 | // It is best to put the parsers in order of their complexity |
||
51 | // (ascending) and their occurrence rate (descending). |
||
52 | // |
||
53 | // Conflicts: |
||
54 | // |
||
55 | // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber` |
||
56 | // They fight over delimiter. The delimiter may be a keyword, a |
||
57 | // number or almost any character which makes the delimiter one of |
||
58 | // the first tokens that must be parsed. |
||
59 | // |
||
60 | // 1. `parseNumber` and `parseOperator` |
||
61 | // They fight over `+` and `-`. |
||
62 | // |
||
63 | // 2. `parseComment` and `parseOperator` |
||
64 | // They fight over `/` (as in ```/*comment*/``` or ```a / b```) |
||
65 | // |
||
66 | // 3. `parseBool` and `parseKeyword` |
||
67 | // They fight over `TRUE` and `FALSE`. |
||
68 | // |
||
69 | // 4. `parseKeyword` and `parseUnknown` |
||
70 | // They fight over words. `parseUnknown` does not know about |
||
71 | // keywords. |
||
72 | |||
73 | 'parseDelimiter', 'parseWhitespace', 'parseNumber', |
||
74 | 'parseComment', 'parseOperator', 'parseBool', 'parseString', |
||
75 | 'parseSymbol', 'parseKeyword', 'parseLabel', 'parseUnknown', |
||
76 | ); |
||
77 | |||
78 | /** |
||
79 | * The string to be parsed. |
||
80 | * |
||
81 | * @var string|UtfString |
||
82 | */ |
||
83 | public $str = ''; |
||
84 | |||
85 | /** |
||
86 | * The length of `$str`. |
||
87 | * |
||
88 | * By storing its length, a lot of time is saved, because parsing methods |
||
89 | * would call `strlen` everytime. |
||
90 | * |
||
91 | * @var int |
||
92 | */ |
||
93 | public $len = 0; |
||
94 | |||
95 | /** |
||
96 | * The index of the last parsed character. |
||
97 | * |
||
98 | * @var int |
||
99 | */ |
||
100 | public $last = 0; |
||
101 | |||
102 | /** |
||
103 | * Tokens extracted from given strings. |
||
104 | * |
||
105 | * @var TokensList |
||
106 | */ |
||
107 | public $list; |
||
108 | |||
109 | /** |
||
110 | * The default delimiter. This is used, by default, in all new instances. |
||
111 | * |
||
112 | * @var string |
||
113 | */ |
||
114 | public static $DEFAULT_DELIMITER = ';'; |
||
115 | |||
116 | /** |
||
117 | * Statements delimiter. |
||
118 | * This may change during lexing. |
||
119 | * |
||
120 | * @var string |
||
121 | */ |
||
122 | public $delimiter; |
||
123 | |||
124 | /** |
||
125 | * The length of the delimiter. |
||
126 | * |
||
127 | * Because `parseDelimiter` can be called a lot, it would perform a lot of |
||
128 | * calls to `strlen`, which might affect performance when the delimiter is |
||
129 | * big. |
||
130 | * |
||
131 | * @var int |
||
132 | */ |
||
133 | public $delimiterLen; |
||
134 | |||
135 | /** |
||
136 | * Gets the tokens list parsed by a new instance of a lexer. |
||
137 | * |
||
138 | * @param string|UtfString $str the query to be lexed |
||
139 | * @param bool $strict whether strict mode should be |
||
140 | * enabled or not |
||
141 | * @param string $delimiter the delimiter to be used |
||
|
|||
142 | * |
||
143 | * @return TokensList |
||
144 | */ |
||
145 | 1 | public static function getTokens($str, $strict = false, $delimiter = null) |
|
151 | |||
152 | /** |
||
153 | * Constructor. |
||
154 | * |
||
155 | * @param string|UtfString $str the query to be lexed |
||
156 | * @param bool $strict whether strict mode should be |
||
157 | * enabled or not |
||
158 | * @param string $delimiter the delimiter to be used |
||
159 | */ |
||
160 | 379 | public function __construct($str, $strict = false, $delimiter = null) |
|
161 | { |
||
162 | // `strlen` is used instead of `mb_strlen` because the lexer needs to |
||
163 | // parse each byte of the input. |
||
164 | 379 | $len = $str instanceof UtfString ? $str->length() : strlen($str); |
|
165 | |||
166 | // For multi-byte strings, a new instance of `UtfString` is |
||
167 | // initialized (only if `UtfString` usage is forced. |
||
168 | 379 | if (!$str instanceof UtfString && USE_UTF_STRINGS && $len !== mb_strlen($str, 'UTF-8')) { |
|
169 | 1 | $str = new UtfString($str); |
|
170 | } |
||
171 | |||
172 | 379 | $this->str = $str; |
|
173 | 379 | $this->len = $str instanceof UtfString ? $str->length() : $len; |
|
174 | |||
175 | 379 | $this->strict = $strict; |
|
176 | |||
177 | // Setting the delimiter. |
||
178 | 379 | $this->setDelimiter( |
|
179 | 379 | !empty($delimiter) ? $delimiter : static::$DEFAULT_DELIMITER |
|
180 | ); |
||
181 | |||
182 | 379 | $this->lex(); |
|
183 | 379 | } |
|
184 | |||
185 | /** |
||
186 | * Sets the delimiter. |
||
187 | * |
||
188 | * @param string $delimiter the new delimiter |
||
189 | */ |
||
190 | 379 | public function setDelimiter($delimiter) |
|
195 | |||
196 | /** |
||
197 | * Parses the string and extracts lexemes. |
||
198 | */ |
||
199 | 379 | public function lex() |
|
200 | { |
||
201 | // TODO: Sometimes, static::parse* functions make unnecessary calls to |
||
202 | // is* functions. For a better performance, some rules can be deduced |
||
203 | // from context. |
||
204 | // For example, in `parseBool` there is no need to compare the token |
||
205 | // every time with `true` and `false`. The first step would be to |
||
206 | // compare with 'true' only and just after that add another letter from |
||
207 | // context and compare again with `false`. |
||
208 | // Another example is `parseComment`. |
||
209 | |||
210 | 379 | $list = new TokensList(); |
|
211 | |||
212 | /** |
||
213 | * Last processed token. |
||
214 | * |
||
215 | * @var Token |
||
216 | */ |
||
217 | 379 | $lastToken = null; |
|
218 | |||
219 | 379 | for ($this->last = 0, $lastIdx = 0; $this->last < $this->len; $lastIdx = ++$this->last) { |
|
220 | /** |
||
221 | * The new token. |
||
222 | * |
||
223 | * @var Token |
||
224 | */ |
||
225 | 373 | $token = null; |
|
226 | |||
227 | 373 | foreach (static::$PARSER_METHODS as $method) { |
|
228 | 373 | if ($token = $this->$method()) { |
|
229 | 373 | break; |
|
230 | } |
||
231 | } |
||
232 | |||
233 | 373 | if ($token === null) { |
|
234 | // @assert($this->last === $lastIdx); |
||
235 | 2 | $token = new Token($this->str[$this->last]); |
|
236 | 2 | $this->error( |
|
237 | 2 | 'Unexpected character.', |
|
238 | 2 | $this->str[$this->last], |
|
239 | 2 | $this->last |
|
240 | ); |
||
241 | 373 | } elseif ($lastToken !== null |
|
242 | 367 | && $token->type === Token::TYPE_SYMBOL |
|
243 | 97 | && $token->flags & Token::FLAG_SYMBOL_VARIABLE |
|
244 | && ( |
||
245 | 28 | $lastToken->type === Token::TYPE_STRING |
|
246 | || ( |
||
247 | 26 | $lastToken->type === Token::TYPE_SYMBOL |
|
248 | 3 | && $lastToken->flags & Token::FLAG_SYMBOL_BACKTICK |
|
249 | ) |
||
250 | ) |
||
251 | ) { |
||
252 | // Handles ```... FROM 'user'@'%' ...```. |
||
253 | 5 | $lastToken->token .= $token->token; |
|
254 | 5 | $lastToken->type = Token::TYPE_SYMBOL; |
|
255 | 5 | $lastToken->flags = Token::FLAG_SYMBOL_USER; |
|
256 | 5 | $lastToken->value .= '@' . $token->value; |
|
257 | 5 | continue; |
|
258 | 373 | } elseif ($lastToken !== null |
|
259 | 367 | && $token->type === Token::TYPE_KEYWORD |
|
260 | 308 | && $lastToken->type === Token::TYPE_OPERATOR |
|
261 | 50 | && $lastToken->value === '.' |
|
262 | ) { |
||
263 | // Handles ```... tbl.FROM ...```. In this case, FROM is not |
||
264 | // a reserved word. |
||
265 | 3 | $token->type = Token::TYPE_NONE; |
|
266 | 3 | $token->flags = 0; |
|
267 | 3 | $token->value = $token->token; |
|
268 | } |
||
269 | |||
270 | 373 | $token->position = $lastIdx; |
|
271 | |||
272 | 373 | $list->tokens[$list->count++] = $token; |
|
273 | |||
274 | // Handling delimiters. |
||
275 | 373 | if ($token->type === Token::TYPE_NONE && $token->value === 'DELIMITER') { |
|
276 | 8 | View Code Duplication | if ($this->last + 1 >= $this->len) { |
277 | 1 | $this->error( |
|
278 | 1 | 'Expected whitespace(s) before delimiter.', |
|
279 | 1 | '', |
|
280 | 1 | $this->last + 1 |
|
281 | ); |
||
282 | 1 | continue; |
|
283 | } |
||
284 | |||
285 | // Skipping last R (from `delimiteR`) and whitespaces between |
||
286 | // the keyword `DELIMITER` and the actual delimiter. |
||
287 | 7 | $pos = ++$this->last; |
|
288 | 7 | if (($token = $this->parseWhitespace()) !== null) { |
|
289 | 5 | $token->position = $pos; |
|
290 | 5 | $list->tokens[$list->count++] = $token; |
|
291 | } |
||
292 | |||
293 | // Preparing the token that holds the new delimiter. |
||
294 | 7 | View Code Duplication | if ($this->last + 1 >= $this->len) { |
295 | 1 | $this->error( |
|
296 | 1 | 'Expected delimiter.', |
|
297 | 1 | '', |
|
298 | 1 | $this->last + 1 |
|
299 | ); |
||
300 | 1 | continue; |
|
301 | } |
||
302 | 6 | $pos = $this->last + 1; |
|
303 | |||
304 | // Parsing the delimiter. |
||
305 | 6 | $this->delimiter = null; |
|
306 | 6 | $delimiterLen = 0; |
|
307 | 6 | while (++$this->last < $this->len && !Context::isWhitespace($this->str[$this->last]) && $delimiterLen < 15) { |
|
308 | 5 | $this->delimiter .= $this->str[$this->last]; |
|
309 | 5 | ++$delimiterLen; |
|
310 | } |
||
311 | |||
312 | 6 | if (empty($this->delimiter)) { |
|
313 | 1 | $this->error( |
|
314 | 1 | 'Expected delimiter.', |
|
315 | 1 | '', |
|
316 | 1 | $this->last |
|
317 | ); |
||
318 | 1 | $this->delimiter = ';'; |
|
319 | } |
||
320 | |||
321 | 6 | --$this->last; |
|
322 | |||
323 | // Saving the delimiter and its token. |
||
324 | 6 | $this->delimiterLen = strlen($this->delimiter); |
|
325 | 6 | $token = new Token($this->delimiter, Token::TYPE_DELIMITER); |
|
326 | 6 | $token->position = $pos; |
|
327 | 6 | $list->tokens[$list->count++] = $token; |
|
328 | } |
||
329 | |||
330 | 371 | $lastToken = $token; |
|
331 | } |
||
332 | |||
333 | // Adding a final delimiter to mark the ending. |
||
334 | 379 | $list->tokens[$list->count++] = new Token(null, Token::TYPE_DELIMITER); |
|
335 | |||
336 | // Saving the tokens list. |
||
337 | 379 | $this->list = $list; |
|
338 | 379 | } |
|
339 | |||
340 | /** |
||
341 | * Creates a new error log. |
||
342 | * |
||
343 | * @param string $msg the error message |
||
344 | * @param string $str the character that produced the error |
||
345 | * @param int $pos the position of the character |
||
346 | * @param int $code the code of the error |
||
347 | * |
||
348 | * @throws LexerException throws the exception, if strict mode is enabled |
||
349 | */ |
||
350 | 13 | public function error($msg, $str = '', $pos = 0, $code = 0) |
|
351 | { |
||
352 | 13 | $error = new LexerException( |
|
353 | 13 | Translator::gettext($msg), |
|
354 | 13 | $str, $pos, $code |
|
355 | ); |
||
356 | 13 | parent::error($error); |
|
357 | 12 | } |
|
358 | |||
359 | /** |
||
360 | * Parses a keyword. |
||
361 | * |
||
362 | * @return null|Token |
||
363 | */ |
||
364 | 363 | public function parseKeyword() |
|
365 | { |
||
366 | 363 | $token = ''; |
|
367 | |||
368 | /** |
||
369 | * Value to be returned. |
||
370 | * |
||
371 | * @var Token |
||
372 | */ |
||
373 | 363 | $ret = null; |
|
374 | |||
375 | /** |
||
376 | * The value of `$this->last` where `$token` ends in `$this->str`. |
||
377 | * |
||
378 | * @var int |
||
379 | */ |
||
380 | 363 | $iEnd = $this->last; |
|
381 | |||
382 | /** |
||
383 | * Whether last parsed character is a whitespace. |
||
384 | * |
||
385 | * @var bool |
||
386 | */ |
||
387 | 363 | $lastSpace = false; |
|
388 | |||
389 | 363 | for ($j = 1; $j < Context::KEYWORD_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) { |
|
390 | // Composed keywords shouldn't have more than one whitespace between |
||
391 | // keywords. |
||
392 | 363 | if (Context::isWhitespace($this->str[$this->last])) { |
|
393 | 350 | if ($lastSpace) { |
|
394 | 59 | --$j; // The size of the keyword didn't increase. |
|
395 | 59 | continue; |
|
396 | } |
||
397 | 350 | $lastSpace = true; |
|
398 | } else { |
||
399 | 363 | $lastSpace = false; |
|
400 | } |
||
401 | |||
402 | 363 | $token .= $this->str[$this->last]; |
|
403 | 363 | if (($this->last + 1 === $this->len || Context::isSeparator($this->str[$this->last + 1])) |
|
404 | 363 | && $flags = Context::isKeyword($token) |
|
405 | ) { |
||
406 | 348 | $ret = new Token($token, Token::TYPE_KEYWORD, $flags); |
|
407 | 348 | $iEnd = $this->last; |
|
408 | |||
409 | // We don't break so we find longest keyword. |
||
410 | // For example, `OR` and `ORDER` have a common prefix `OR`. |
||
411 | // If we stopped at `OR`, the parsing would be invalid. |
||
412 | } |
||
413 | } |
||
414 | |||
415 | 363 | $this->last = $iEnd; |
|
416 | |||
417 | 363 | return $ret; |
|
418 | } |
||
419 | |||
420 | /** |
||
421 | * Parses a label. |
||
422 | * |
||
423 | * @return null|Token |
||
424 | */ |
||
425 | 264 | public function parseLabel() |
|
426 | { |
||
427 | 264 | $token = ''; |
|
428 | |||
429 | /** |
||
430 | * Value to be returned. |
||
431 | * |
||
432 | * @var Token |
||
433 | */ |
||
434 | 264 | $ret = null; |
|
435 | |||
436 | /** |
||
437 | * The value of `$this->last` where `$token` ends in `$this->str`. |
||
438 | * |
||
439 | * @var int |
||
440 | */ |
||
441 | 264 | $iEnd = $this->last; |
|
442 | 264 | for ($j = 1; $j < Context::LABEL_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) { |
|
443 | 264 | if ($this->str[$this->last] === ':' && $j > 1) { |
|
444 | // End of label |
||
445 | 2 | $token .= $this->str[$this->last]; |
|
446 | 2 | $ret = new Token($token, Token::TYPE_LABEL); |
|
447 | 2 | $iEnd = $this->last; |
|
448 | 2 | break; |
|
449 | 264 | } elseif (Context::isWhitespace($this->str[$this->last]) && $j > 1) { |
|
450 | // Whitespace between label and : |
||
451 | // The size of the keyword didn't increase. |
||
452 | 199 | --$j; |
|
453 | 264 | } elseif (Context::isSeparator($this->str[$this->last])) { |
|
454 | // Any other separator |
||
455 | 206 | break; |
|
456 | } |
||
457 | 263 | $token .= $this->str[$this->last]; |
|
458 | } |
||
459 | |||
460 | 264 | $this->last = $iEnd; |
|
461 | |||
462 | 264 | return $ret; |
|
463 | } |
||
464 | |||
465 | /** |
||
466 | * Parses an operator. |
||
467 | * |
||
468 | * @return null|Token |
||
469 | */ |
||
470 | 373 | public function parseOperator() |
|
471 | { |
||
472 | 373 | $token = ''; |
|
473 | |||
474 | /** |
||
475 | * Value to be returned. |
||
476 | * |
||
477 | * @var Token |
||
478 | */ |
||
479 | 373 | $ret = null; |
|
480 | |||
481 | /** |
||
482 | * The value of `$this->last` where `$token` ends in `$this->str`. |
||
483 | * |
||
484 | * @var int |
||
485 | */ |
||
486 | 373 | $iEnd = $this->last; |
|
487 | |||
488 | 373 | for ($j = 1; $j < Context::OPERATOR_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) { |
|
489 | 373 | $token .= $this->str[$this->last]; |
|
490 | 373 | if ($flags = Context::isOperator($token)) { |
|
491 | 273 | $ret = new Token($token, Token::TYPE_OPERATOR, $flags); |
|
492 | 273 | $iEnd = $this->last; |
|
493 | } |
||
494 | } |
||
495 | |||
496 | 373 | $this->last = $iEnd; |
|
497 | |||
498 | 373 | return $ret; |
|
499 | } |
||
500 | |||
501 | /** |
||
502 | * Parses a whitespace. |
||
503 | * |
||
504 | * @return null|Token |
||
505 | */ |
||
506 | 373 | View Code Duplication | public function parseWhitespace() |
507 | { |
||
508 | 373 | $token = $this->str[$this->last]; |
|
509 | |||
510 | 373 | if (!Context::isWhitespace($token)) { |
|
511 | 373 | return null; |
|
512 | } |
||
513 | |||
514 | 360 | while (++$this->last < $this->len && Context::isWhitespace($this->str[$this->last])) { |
|
515 | 59 | $token .= $this->str[$this->last]; |
|
516 | } |
||
517 | |||
518 | 360 | --$this->last; |
|
519 | |||
520 | 360 | return new Token($token, Token::TYPE_WHITESPACE); |
|
521 | } |
||
522 | |||
523 | /** |
||
524 | * Parses a comment. |
||
525 | * |
||
526 | * @return null|Token |
||
527 | */ |
||
528 | 373 | public function parseComment() |
|
529 | { |
||
530 | 373 | $iBak = $this->last; |
|
531 | 373 | $token = $this->str[$this->last]; |
|
532 | |||
533 | // Bash style comments. (#comment\n) |
||
534 | 373 | if (Context::isComment($token)) { |
|
535 | View Code Duplication | while ( |
|
536 | 3 | ++$this->last < $this->len |
|
537 | 3 | && $this->str[$this->last] !== "\n" |
|
538 | ) { |
||
539 | 3 | $token .= $this->str[$this->last]; |
|
540 | } |
||
541 | // Include trailing \n as whitespace token |
||
542 | 3 | if ($this->last < $this->len) { |
|
543 | 3 | --$this->last; |
|
544 | } |
||
545 | |||
546 | 3 | return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_BASH); |
|
547 | } |
||
548 | |||
549 | // C style comments. (/*comment*\/) |
||
550 | 373 | if (++$this->last < $this->len) { |
|
551 | 371 | $token .= $this->str[$this->last]; |
|
552 | 371 | if (Context::isComment($token)) { |
|
553 | 23 | $flags = Token::FLAG_COMMENT_C; |
|
554 | |||
555 | // This comment already ended. It may be a part of a |
||
556 | // previous MySQL specific command. |
||
557 | 23 | if ($token === '*/') { |
|
558 | 2 | return new Token($token, Token::TYPE_COMMENT, $flags); |
|
559 | } |
||
560 | |||
561 | // Checking if this is a MySQL-specific command. |
||
562 | 23 | if ($this->last + 1 < $this->len |
|
563 | 23 | && $this->str[$this->last + 1] === '!' |
|
564 | ) { |
||
565 | 2 | $flags |= Token::FLAG_COMMENT_MYSQL_CMD; |
|
566 | 2 | $token .= $this->str[++$this->last]; |
|
567 | |||
568 | while ( |
||
569 | 2 | ++$this->last < $this->len |
|
570 | 2 | && '0' <= $this->str[$this->last] |
|
571 | 2 | && $this->str[$this->last] <= '9' |
|
572 | ) { |
||
573 | 1 | $token .= $this->str[$this->last]; |
|
574 | } |
||
575 | 2 | --$this->last; |
|
576 | |||
577 | // We split this comment and parse only its beginning |
||
578 | // here. |
||
579 | 2 | return new Token($token, Token::TYPE_COMMENT, $flags); |
|
580 | } |
||
581 | |||
582 | // Parsing the comment. |
||
583 | while ( |
||
584 | 23 | ++$this->last < $this->len |
|
585 | && ( |
||
586 | 23 | $this->str[$this->last - 1] !== '*' |
|
587 | 23 | || $this->str[$this->last] !== '/' |
|
588 | ) |
||
589 | ) { |
||
590 | 23 | $token .= $this->str[$this->last]; |
|
591 | } |
||
592 | |||
593 | // Adding the ending. |
||
594 | 23 | if ($this->last < $this->len) { |
|
595 | 23 | $token .= $this->str[$this->last]; |
|
596 | } |
||
597 | |||
598 | 23 | return new Token($token, Token::TYPE_COMMENT, $flags); |
|
599 | } |
||
600 | } |
||
601 | |||
602 | // SQL style comments. (-- comment\n) |
||
603 | 373 | if (++$this->last < $this->len) { |
|
604 | 370 | $token .= $this->str[$this->last]; |
|
605 | 370 | $end = false; |
|
606 | } else { |
||
607 | 119 | --$this->last; |
|
608 | 119 | $end = true; |
|
609 | } |
||
610 | 373 | if (Context::isComment($token, $end)) { |
|
611 | // Checking if this comment did not end already (```--\n```). |
||
612 | 6 | if ($this->str[$this->last] !== "\n") { |
|
613 | View Code Duplication | while ( |
|
614 | 6 | ++$this->last < $this->len |
|
615 | 6 | && $this->str[$this->last] !== "\n" |
|
616 | ) { |
||
617 | 6 | $token .= $this->str[$this->last]; |
|
618 | } |
||
619 | } |
||
620 | // Include trailing \n as whitespace token |
||
621 | 6 | if ($this->last < $this->len) { |
|
622 | 4 | --$this->last; |
|
623 | } |
||
624 | |||
625 | 6 | return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL); |
|
626 | } |
||
627 | |||
628 | 373 | $this->last = $iBak; |
|
629 | |||
630 | 373 | return null; |
|
631 | } |
||
632 | |||
633 | /** |
||
634 | * Parses a boolean. |
||
635 | * |
||
636 | * @return null|Token |
||
637 | */ |
||
638 | 364 | public function parseBool() |
|
639 | { |
||
640 | 364 | if ($this->last + 3 >= $this->len) { |
|
641 | // At least `min(strlen('TRUE'), strlen('FALSE'))` characters are |
||
642 | // required. |
||
643 | 100 | return null; |
|
644 | } |
||
645 | |||
646 | 364 | $iBak = $this->last; |
|
647 | 364 | $token = $this->str[$this->last] . $this->str[++$this->last] |
|
648 | 364 | . $this->str[++$this->last] . $this->str[++$this->last]; // _TRUE_ or _FALS_e |
|
649 | |||
650 | 364 | if (Context::isBool($token)) { |
|
651 | 1 | return new Token($token, Token::TYPE_BOOL); |
|
652 | 364 | } elseif (++$this->last < $this->len) { |
|
653 | 363 | $token .= $this->str[$this->last]; // fals_E_ |
|
654 | 363 | if (Context::isBool($token)) { |
|
655 | 1 | return new Token($token, Token::TYPE_BOOL, 1); |
|
656 | } |
||
657 | } |
||
658 | |||
659 | 364 | $this->last = $iBak; |
|
660 | |||
661 | 364 | return null; |
|
662 | } |
||
663 | |||
664 | /** |
||
665 | * Parses a number. |
||
666 | * |
||
667 | * @return null|Token |
||
668 | */ |
||
669 | 373 | public function parseNumber() |
|
670 | { |
||
671 | // A rudimentary state machine is being used to parse numbers due to |
||
672 | // the various forms of their notation. |
||
673 | // |
||
674 | // Below are the states of the machines and the conditions to change |
||
675 | // the state. |
||
676 | // |
||
677 | // 1 --------------------[ + or - ]-------------------> 1 |
||
678 | // 1 -------------------[ 0x or 0X ]------------------> 2 |
||
679 | // 1 --------------------[ 0 to 9 ]-------------------> 3 |
||
680 | // 1 -----------------------[ . ]---------------------> 4 |
||
681 | // 1 -----------------------[ b ]---------------------> 7 |
||
682 | // |
||
683 | // 2 --------------------[ 0 to F ]-------------------> 2 |
||
684 | // |
||
685 | // 3 --------------------[ 0 to 9 ]-------------------> 3 |
||
686 | // 3 -----------------------[ . ]---------------------> 4 |
||
687 | // 3 --------------------[ e or E ]-------------------> 5 |
||
688 | // |
||
689 | // 4 --------------------[ 0 to 9 ]-------------------> 4 |
||
690 | // 4 --------------------[ e or E ]-------------------> 5 |
||
691 | // |
||
692 | // 5 ---------------[ + or - or 0 to 9 ]--------------> 6 |
||
693 | // |
||
694 | // 7 -----------------------[ ' ]---------------------> 8 |
||
695 | // |
||
696 | // 8 --------------------[ 0 or 1 ]-------------------> 8 |
||
697 | // 8 -----------------------[ ' ]---------------------> 9 |
||
698 | // |
||
699 | // State 1 may be reached by negative numbers. |
||
700 | // State 2 is reached only by hex numbers. |
||
701 | // State 4 is reached only by float numbers. |
||
702 | // State 5 is reached only by numbers in approximate form. |
||
703 | // State 7 is reached only by numbers in bit representation. |
||
704 | // |
||
705 | // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a |
||
706 | // state other than these is invalid. |
||
707 | 373 | $iBak = $this->last; |
|
708 | 373 | $token = ''; |
|
709 | 373 | $flags = 0; |
|
710 | 373 | $state = 1; |
|
711 | 373 | for (; $this->last < $this->len; ++$this->last) { |
|
712 | 373 | if ($state === 1) { |
|
713 | 373 | if ($this->str[$this->last] === '-') { |
|
714 | 6 | $flags |= Token::FLAG_NUMBER_NEGATIVE; |
|
715 | 373 | } elseif ($this->last + 1 < $this->len |
|
716 | 371 | && $this->str[$this->last] === '0' |
|
717 | && ( |
||
718 | 20 | $this->str[$this->last + 1] === 'x' |
|
719 | 20 | || $this->str[$this->last + 1] === 'X' |
|
720 | ) |
||
721 | ) { |
||
722 | 1 | $token .= $this->str[$this->last++]; |
|
723 | 1 | $state = 2; |
|
724 | 373 | } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') { |
|
725 | 177 | $state = 3; |
|
726 | 373 | } elseif ($this->str[$this->last] === '.') { |
|
727 | 54 | $state = 4; |
|
728 | 373 | } elseif ($this->str[$this->last] === 'b') { |
|
729 | 34 | $state = 7; |
|
730 | 373 | } elseif ($this->str[$this->last] !== '+') { |
|
731 | // `+` is a valid character in a number. |
||
732 | 373 | break; |
|
733 | } |
||
734 | 204 | } elseif ($state === 2) { |
|
735 | 1 | $flags |= Token::FLAG_NUMBER_HEX; |
|
736 | if ( |
||
737 | !( |
||
738 | 1 | ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') |
|
739 | 1 | || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F') |
|
740 | 1 | || ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f') |
|
741 | ) |
||
742 | ) { |
||
743 | 1 | break; |
|
744 | } |
||
745 | 204 | } elseif ($state === 3) { |
|
746 | 158 | if ($this->str[$this->last] === '.') { |
|
747 | 4 | $state = 4; |
|
748 | 158 | } elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') { |
|
749 | 1 | $state = 5; |
|
750 | 158 | } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { |
|
751 | // Just digits and `.`, `e` and `E` are valid characters. |
||
752 | 156 | break; |
|
753 | } |
||
754 | 83 | } elseif ($state === 4) { |
|
755 | 57 | $flags |= Token::FLAG_NUMBER_FLOAT; |
|
756 | 57 | View Code Duplication | if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') { |
757 | 2 | $state = 5; |
|
758 | 57 | } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { |
|
759 | // Just digits, `e` and `E` are valid characters. |
||
760 | 56 | break; |
|
761 | } |
||
762 | 34 | } elseif ($state === 5) { |
|
763 | 2 | $flags |= Token::FLAG_NUMBER_APPROXIMATE; |
|
764 | 2 | View Code Duplication | if ($this->str[$this->last] === '+' || $this->str[$this->last] === '-' |
765 | 2 | || ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') |
|
766 | ) { |
||
767 | 1 | $state = 6; |
|
768 | } else { |
||
769 | 2 | break; |
|
770 | } |
||
771 | 33 | } elseif ($state === 6) { |
|
772 | 1 | if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { |
|
773 | // Just digits are valid characters. |
||
774 | 1 | break; |
|
775 | } |
||
776 | 33 | } elseif ($state === 7) { |
|
777 | 33 | $flags |= Token::FLAG_NUMBER_BINARY; |
|
778 | 33 | if ($this->str[$this->last] === '\'') { |
|
779 | 1 | $state = 8; |
|
780 | } else { |
||
781 | 32 | break; |
|
782 | } |
||
783 | 1 | } elseif ($state === 8) { |
|
784 | 1 | if ($this->str[$this->last] === '\'') { |
|
785 | 1 | $state = 9; |
|
786 | 1 | } elseif ($this->str[$this->last] !== '0' |
|
787 | 1 | && $this->str[$this->last] !== '1' |
|
788 | ) { |
||
789 | 1 | break; |
|
790 | } |
||
791 | 1 | } elseif ($state === 9) { |
|
792 | 1 | break; |
|
793 | } |
||
794 | 224 | $token .= $this->str[$this->last]; |
|
795 | } |
||
796 | 373 | if ($state === 2 || $state === 3 |
|
797 | 373 | || ($token !== '.' && $state === 4) |
|
798 | 373 | || $state === 6 || $state === 9 |
|
799 | ) { |
||
800 | 177 | --$this->last; |
|
801 | |||
802 | 177 | return new Token($token, Token::TYPE_NUMBER, $flags); |
|
803 | } |
||
804 | 373 | $this->last = $iBak; |
|
805 | |||
806 | 373 | return null; |
|
807 | } |
||
808 | |||
809 | /** |
||
810 | * Parses a string. |
||
811 | * |
||
812 | * @param string $quote additional starting symbol |
||
813 | * |
||
814 | * @return null|Token |
||
815 | */ |
||
816 | 364 | public function parseString($quote = '') |
|
817 | { |
||
818 | 364 | $token = $this->str[$this->last]; |
|
819 | 364 | if (!($flags = Context::isString($token)) && $token !== $quote) { |
|
820 | 364 | return null; |
|
821 | } |
||
822 | 159 | $quote = $token; |
|
823 | |||
824 | 159 | while (++$this->last < $this->len) { |
|
825 | 159 | if ($this->last + 1 < $this->len |
|
826 | && ( |
||
827 | 159 | ($this->str[$this->last] === $quote && $this->str[$this->last + 1] === $quote) |
|
828 | 159 | || ($this->str[$this->last] === '\\' && $quote !== '`') |
|
829 | ) |
||
830 | ) { |
||
831 | 10 | $token .= $this->str[$this->last] . $this->str[++$this->last]; |
|
832 | } else { |
||
833 | 159 | if ($this->str[$this->last] === $quote) { |
|
834 | 158 | break; |
|
835 | } |
||
836 | 157 | $token .= $this->str[$this->last]; |
|
837 | } |
||
838 | } |
||
839 | |||
840 | 159 | if ($this->last >= $this->len || $this->str[$this->last] !== $quote) { |
|
841 | 4 | $this->error( |
|
842 | 4 | sprintf( |
|
843 | 4 | Translator::gettext('Ending quote %1$s was expected.'), |
|
844 | 4 | $quote |
|
845 | ), |
||
846 | 4 | '', |
|
847 | 4 | $this->last |
|
848 | ); |
||
849 | } else { |
||
850 | 158 | $token .= $this->str[$this->last]; |
|
851 | } |
||
852 | |||
853 | 159 | return new Token($token, Token::TYPE_STRING, $flags); |
|
854 | } |
||
855 | |||
856 | /** |
||
857 | * Parses a symbol. |
||
858 | * |
||
859 | * @return null|Token |
||
860 | */ |
||
861 | 364 | public function parseSymbol() |
|
862 | { |
||
863 | 364 | $token = $this->str[$this->last]; |
|
864 | 364 | if (!($flags = Context::isSymbol($token))) { |
|
865 | 363 | return null; |
|
866 | } |
||
867 | |||
868 | 97 | if ($flags & Token::FLAG_SYMBOL_VARIABLE) { |
|
869 | 28 | if ($this->last + 1 < $this->len && $this->str[++$this->last] === '@') { |
|
870 | // This is a system variable (e.g. `@@hostname`). |
||
871 | 1 | $token .= $this->str[$this->last++]; |
|
872 | 1 | $flags |= Token::FLAG_SYMBOL_SYSTEM; |
|
873 | } |
||
874 | 76 | } elseif ($flags & Token::FLAG_SYMBOL_PARAMETER) { |
|
875 | if ($this->last + 1 < $this->len) { |
||
876 | $this->last++; |
||
877 | } |
||
878 | } else { |
||
879 | 76 | $token = ''; |
|
880 | } |
||
881 | |||
882 | 97 | $str = null; |
|
883 | |||
884 | 97 | if ($this->last < $this->len) { |
|
885 | 97 | if (($str = $this->parseString('`')) === null) { |
|
886 | 23 | if (($str = static::parseUnknown()) === null) { |
|
887 | 2 | $this->error( |
|
888 | 2 | 'Variable name was expected.', |
|
889 | 2 | $this->str[$this->last], |
|
890 | 2 | $this->last |
|
891 | ); |
||
892 | } |
||
893 | } |
||
894 | } |
||
895 | |||
896 | 97 | if ($str !== null) { |
|
897 | 96 | $token .= $str->token; |
|
898 | } |
||
899 | |||
900 | 97 | return new Token($token, Token::TYPE_SYMBOL, $flags); |
|
901 | } |
||
902 | |||
903 | /** |
||
904 | * Parses unknown parts of the query. |
||
905 | * |
||
906 | * @return null|Token |
||
907 | */ |
||
908 | 269 | View Code Duplication | public function parseUnknown() |
922 | |||
923 | /** |
||
924 | * Parses the delimiter of the query. |
||
925 | * |
||
926 | * @return null|Token |
||
927 | */ |
||
928 | 373 | public function parseDelimiter() |
|
943 | } |
||
944 |
This check looks for
@param
annotations where the type inferred by our type inference engine differs from the declared type.It makes a suggestion as to what type it considers more descriptive.
Most often this is a case of a parameter that can be null in addition to its declared types.