Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Lexer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Lexer, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
42 | class Lexer extends Core |
||
43 | { |
||
44 | /** |
||
45 | * A list of methods that are used in lexing the SQL query. |
||
46 | * |
||
47 | * @var array |
||
48 | */ |
||
49 | public static $PARSER_METHODS = array( |
||
50 | // It is best to put the parsers in order of their complexity |
||
51 | // (ascending) and their occurrence rate (descending). |
||
52 | // |
||
53 | // Conflicts: |
||
54 | // |
||
55 | // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber` |
||
56 | // They fight over delimiter. The delimiter may be a keyword, a |
||
57 | // number or almost any character which makes the delimiter one of |
||
58 | // the first tokens that must be parsed. |
||
59 | // |
||
60 | // 1. `parseNumber` and `parseOperator` |
||
61 | // They fight over `+` and `-`. |
||
62 | // |
||
63 | // 2. `parseComment` and `parseOperator` |
||
64 | // They fight over `/` (as in ```/*comment*/``` or ```a / b```) |
||
65 | // |
||
66 | // 3. `parseBool` and `parseKeyword` |
||
67 | // They fight over `TRUE` and `FALSE`. |
||
68 | // |
||
69 | // 4. `parseKeyword` and `parseUnknown` |
||
70 | // They fight over words. `parseUnknown` does not know about |
||
71 | // keywords. |
||
72 | |||
73 | 'parseDelimiter', 'parseWhitespace', 'parseNumber', |
||
74 | 'parseComment', 'parseOperator', 'parseBool', 'parseString', |
||
75 | 'parseSymbol', 'parseKeyword', 'parseLabel', 'parseUnknown', |
||
76 | ); |
||
77 | |||
78 | /** |
||
79 | * The string to be parsed. |
||
80 | * |
||
81 | * @var string|UtfString |
||
82 | */ |
||
83 | public $str = ''; |
||
84 | |||
85 | /** |
||
86 | * The length of `$str`. |
||
87 | * |
||
88 | * By storing its length, a lot of time is saved, because parsing methods |
||
89 | * would call `strlen` everytime. |
||
90 | * |
||
91 | * @var int |
||
92 | */ |
||
93 | public $len = 0; |
||
94 | |||
95 | /** |
||
96 | * The index of the last parsed character. |
||
97 | * |
||
98 | * @var int |
||
99 | */ |
||
100 | public $last = 0; |
||
101 | |||
102 | /** |
||
103 | * Tokens extracted from given strings. |
||
104 | * |
||
105 | * @var TokensList |
||
106 | */ |
||
107 | public $list; |
||
108 | |||
109 | /** |
||
110 | * The default delimiter. This is used, by default, in all new instances. |
||
111 | * |
||
112 | * @var string |
||
113 | */ |
||
114 | public static $DEFAULT_DELIMITER = ';'; |
||
115 | |||
116 | /** |
||
117 | * Statements delimiter. |
||
118 | * This may change during lexing. |
||
119 | * |
||
120 | * @var string |
||
121 | */ |
||
122 | public $delimiter; |
||
123 | |||
124 | /** |
||
125 | * The length of the delimiter. |
||
126 | * |
||
127 | * Because `parseDelimiter` can be called a lot, it would perform a lot of |
||
128 | * calls to `strlen`, which might affect performance when the delimiter is |
||
129 | * big. |
||
130 | * |
||
131 | * @var int |
||
132 | */ |
||
133 | public $delimiterLen; |
||
134 | |||
135 | /** |
||
136 | * Gets the tokens list parsed by a new instance of a lexer. |
||
137 | * |
||
138 | * @param string|UtfString $str the query to be lexed |
||
139 | * @param bool $strict whether strict mode should be |
||
140 | * enabled or not |
||
141 | * @param string $delimiter the delimiter to be used |
||
|
|||
142 | * |
||
143 | * @return TokensList |
||
144 | */ |
||
145 | public static function getTokens($str, $strict = false, $delimiter = null) |
||
146 | { |
||
147 | $lexer = new self($str, $strict, $delimiter); |
||
148 | |||
149 | return $lexer->list; |
||
150 | } |
||
151 | |||
152 | /** |
||
153 | * Constructor. |
||
154 | * |
||
155 | * @param string|UtfString $str the query to be lexed |
||
156 | * @param bool $strict whether strict mode should be |
||
157 | * enabled or not |
||
158 | * @param string $delimiter the delimiter to be used |
||
159 | */ |
||
160 | public function __construct($str, $strict = false, $delimiter = null) |
||
161 | { |
||
162 | // `strlen` is used instead of `mb_strlen` because the lexer needs to |
||
163 | // parse each byte of the input. |
||
164 | $len = $str instanceof UtfString ? $str->length() : strlen($str); |
||
165 | |||
166 | // For multi-byte strings, a new instance of `UtfString` is |
||
167 | // initialized (only if `UtfString` usage is forced. |
||
168 | if (!$str instanceof UtfString && USE_UTF_STRINGS && $len !== mb_strlen($str, 'UTF-8')) { |
||
169 | $str = new UtfString($str); |
||
170 | } |
||
171 | |||
172 | 1 | $this->str = $str; |
|
173 | $this->len = $str instanceof UtfString ? $str->length() : $len; |
||
174 | 1 | ||
175 | 1 | $this->strict = $strict; |
|
176 | |||
177 | // Setting the delimiter. |
||
178 | $this->setDelimiter( |
||
179 | !empty($delimiter) ? $delimiter : static::$DEFAULT_DELIMITER |
||
180 | ); |
||
181 | |||
182 | $this->lex(); |
||
183 | } |
||
184 | |||
185 | /** |
||
186 | 223 | * Sets the delimiter. |
|
187 | * |
||
188 | * @param string $delimiter the new delimiter |
||
189 | */ |
||
190 | 223 | public function setDelimiter($delimiter) |
|
191 | { |
||
192 | $this->delimiter = $delimiter; |
||
193 | $this->delimiterLen = strlen($delimiter); |
||
194 | 223 | } |
|
195 | 223 | ||
196 | 1 | /** |
|
197 | 1 | * Parses the string and extracts lexemes. |
|
198 | 223 | */ |
|
199 | public function lex() |
||
200 | 223 | { |
|
201 | 223 | // TODO: Sometimes, static::parse* functions make unnecessary calls to |
|
202 | // is* functions. For a better performance, some rules can be deduced |
||
203 | 223 | // from context. |
|
204 | // For example, in `parseBool` there is no need to compare the token |
||
205 | // every time with `true` and `false`. The first step would be to |
||
206 | 223 | // compare with 'true' only and just after that add another letter from |
|
207 | 223 | // context and compare again with `false`. |
|
208 | 223 | // Another example is `parseComment`. |
|
209 | |||
210 | 223 | $list = new TokensList(); |
|
211 | 223 | ||
212 | /** |
||
213 | * Last processed token. |
||
214 | * |
||
215 | * @var Token |
||
216 | */ |
||
217 | $lastToken = null; |
||
218 | 223 | ||
219 | for ($this->last = 0, $lastIdx = 0; $this->last < $this->len; $lastIdx = ++$this->last) { |
||
220 | 223 | /** |
|
221 | 223 | * The new token. |
|
222 | 223 | * |
|
223 | * @var Token |
||
224 | */ |
||
225 | $token = null; |
||
226 | |||
227 | foreach (static::$PARSER_METHODS as $method) { |
||
228 | if ($token = $this->$method()) { |
||
229 | 223 | break; |
|
230 | } |
||
231 | } |
||
232 | |||
233 | if ($token === null) { |
||
234 | // @assert($this->last === $lastIdx); |
||
235 | $token = new Token($this->str[$this->last]); |
||
236 | $this->error( |
||
237 | 'Unexpected character.', |
||
238 | $this->str[$this->last], |
||
239 | $this->last |
||
240 | 223 | ); |
|
241 | } elseif ($lastToken !== null |
||
242 | && $token->type === Token::TYPE_SYMBOL |
||
243 | && $token->flags & Token::FLAG_SYMBOL_VARIABLE |
||
244 | && ( |
||
245 | $lastToken->type === Token::TYPE_STRING |
||
246 | || ( |
||
247 | 223 | $lastToken->type === Token::TYPE_SYMBOL |
|
248 | && $lastToken->flags & Token::FLAG_SYMBOL_BACKTICK |
||
249 | 223 | ) |
|
250 | ) |
||
251 | ) { |
||
252 | // Handles ```... FROM 'user'@'%' ...```. |
||
253 | $lastToken->token .= $token->token; |
||
254 | $lastToken->type = Token::TYPE_SYMBOL; |
||
255 | 218 | $lastToken->flags = Token::FLAG_SYMBOL_USER; |
|
256 | $lastToken->value .= '@' . $token->value; |
||
257 | 218 | continue; |
|
258 | 218 | } elseif ($lastToken !== null |
|
259 | 218 | && $token->type === Token::TYPE_KEYWORD |
|
260 | && $lastToken->type === Token::TYPE_OPERATOR |
||
261 | 218 | && $lastToken->value === '.' |
|
262 | ) { |
||
263 | 218 | // Handles ```... tbl.FROM ...```. In this case, FROM is not |
|
264 | // a reserved word. |
||
265 | 2 | $token->type = Token::TYPE_NONE; |
|
266 | 2 | $token->flags = 0; |
|
267 | 2 | $token->value = $token->token; |
|
268 | 2 | } |
|
269 | 2 | ||
270 | 2 | $token->position = $lastIdx; |
|
271 | 218 | ||
272 | 218 | $list->tokens[$list->count++] = $token; |
|
273 | 218 | ||
274 | 218 | // Handling delimiters. |
|
275 | 13 | if ($token->type === Token::TYPE_NONE && $token->value === 'DELIMITER') { |
|
276 | 13 | View Code Duplication | if ($this->last + 1 >= $this->len) { |
277 | 218 | $this->error( |
|
278 | 'Expected whitespace(s) before delimiter.', |
||
279 | 4 | '', |
|
280 | 4 | $this->last + 1 |
|
281 | 4 | ); |
|
282 | 4 | continue; |
|
283 | 4 | } |
|
284 | 218 | ||
285 | 218 | // Skipping last R (from `delimiteR`) and whitespaces between |
|
286 | 218 | // the keyword `DELIMITER` and the actual delimiter. |
|
287 | 218 | $pos = ++$this->last; |
|
288 | 218 | if (($token = $this->parseWhitespace()) !== null) { |
|
289 | $token->position = $pos; |
||
290 | $list->tokens[$list->count++] = $token; |
||
291 | 2 | } |
|
292 | 2 | ||
293 | 2 | // Preparing the token that holds the new delimiter. |
|
294 | 2 | View Code Duplication | if ($this->last + 1 >= $this->len) { |
295 | $this->error( |
||
296 | 218 | 'Expected delimiter.', |
|
297 | '', |
||
298 | 218 | $this->last + 1 |
|
299 | ); |
||
300 | continue; |
||
301 | 218 | } |
|
302 | 6 | $pos = $this->last + 1; |
|
303 | 1 | ||
304 | 1 | // Parsing the delimiter. |
|
305 | 1 | $this->delimiter = null; |
|
306 | 1 | while (++$this->last < $this->len && !Context::isWhitespace($this->str[$this->last])) { |
|
307 | 1 | $this->delimiter .= $this->str[$this->last]; |
|
308 | 1 | } |
|
309 | |||
310 | if (empty($this->delimiter)) { |
||
311 | $this->error( |
||
312 | 'Expected delimiter.', |
||
313 | 5 | '', |
|
314 | 5 | $this->last |
|
315 | 4 | ); |
|
316 | 4 | $this->delimiter = ';'; |
|
317 | 4 | } |
|
318 | |||
319 | --$this->last; |
||
320 | 5 | ||
321 | 1 | // Saving the delimiter and its token. |
|
322 | 1 | $this->delimiterLen = strlen($this->delimiter); |
|
323 | 1 | $token = new Token($this->delimiter, Token::TYPE_DELIMITER); |
|
324 | 1 | $token->position = $pos; |
|
325 | 1 | $list->tokens[$list->count++] = $token; |
|
326 | 1 | } |
|
327 | |||
328 | 4 | $lastToken = $token; |
|
329 | } |
||
330 | |||
331 | 4 | // Adding a final delimiter to mark the ending. |
|
332 | 4 | $list->tokens[$list->count++] = new Token(null, Token::TYPE_DELIMITER); |
|
333 | 3 | ||
334 | 3 | // Saving the tokens list. |
|
335 | $this->list = $list; |
||
336 | 4 | } |
|
337 | 1 | ||
338 | 1 | /** |
|
339 | 1 | * Creates a new error log. |
|
340 | 1 | * |
|
341 | 1 | * @param string $msg the error message |
|
342 | 1 | * @param string $str the character that produced the error |
|
343 | 1 | * @param int $pos the position of the character |
|
344 | * @param int $code the code of the error |
||
345 | 4 | * |
|
346 | * @throws LexerException throws the exception, if strict mode is enabled |
||
347 | */ |
||
348 | 4 | public function error($msg, $str = '', $pos = 0, $code = 0) |
|
349 | 4 | { |
|
350 | 4 | $error = new LexerException( |
|
351 | 4 | Translator::gettext($msg), |
|
352 | 4 | $str, $pos, $code |
|
353 | ); |
||
354 | 216 | parent::error($error); |
|
355 | 216 | } |
|
356 | |||
357 | /** |
||
358 | 223 | * Parses a keyword. |
|
359 | * |
||
360 | * @return Token |
||
361 | 223 | */ |
|
362 | 223 | public function parseKeyword() |
|
363 | { |
||
364 | $token = ''; |
||
365 | |||
366 | /** |
||
367 | * Value to be returned. |
||
368 | * |
||
369 | * @var Token |
||
370 | */ |
||
371 | $ret = null; |
||
372 | |||
373 | /** |
||
374 | * The value of `$this->last` where `$token` ends in `$this->str`. |
||
375 | * |
||
376 | 12 | * @var int |
|
377 | */ |
||
378 | 12 | $iEnd = $this->last; |
|
379 | 12 | ||
380 | 1 | /** |
|
381 | * Whether last parsed character is a whitespace. |
||
382 | 11 | * |
|
383 | 11 | * @var bool |
|
384 | */ |
||
385 | $lastSpace = false; |
||
386 | |||
387 | for ($j = 1; $j < Context::KEYWORD_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) { |
||
388 | // Composed keywords shouldn't have more than one whitespace between |
||
389 | // keywords. |
||
390 | 210 | if (Context::isWhitespace($this->str[$this->last])) { |
|
391 | if ($lastSpace) { |
||
392 | 210 | --$j; // The size of the keyword didn't increase. |
|
393 | continue; |
||
394 | } else { |
||
395 | $lastSpace = true; |
||
396 | } |
||
397 | } else { |
||
398 | $lastSpace = false; |
||
399 | 210 | } |
|
400 | |||
401 | $token .= $this->str[$this->last]; |
||
402 | if (($this->last + 1 === $this->len || Context::isSeparator($this->str[$this->last + 1])) |
||
403 | && $flags = Context::isKeyword($token) |
||
404 | ) { |
||
405 | $ret = new Token($token, Token::TYPE_KEYWORD, $flags); |
||
406 | 210 | $iEnd = $this->last; |
|
407 | |||
408 | // We don't break so we find longest keyword. |
||
409 | // For example, `OR` and `ORDER` have a common prefix `OR`. |
||
410 | // If we stopped at `OR`, the parsing would be invalid. |
||
411 | } |
||
412 | } |
||
413 | 210 | ||
414 | $this->last = $iEnd; |
||
415 | 210 | ||
416 | return $ret; |
||
417 | } |
||
418 | 210 | ||
419 | 197 | /** |
|
420 | 36 | * Parses a label. |
|
421 | 36 | * |
|
422 | * @return Token |
||
423 | 197 | */ |
|
424 | public function parseLabel() |
||
425 | 197 | { |
|
426 | 210 | $token = ''; |
|
427 | |||
428 | 210 | /** |
|
429 | 210 | * Value to be returned. |
|
430 | 210 | * |
|
431 | 196 | * @var Token |
|
432 | 196 | */ |
|
433 | $ret = null; |
||
434 | |||
435 | /** |
||
436 | * The value of `$this->last` where `$token` ends in `$this->str`. |
||
437 | 196 | * |
|
438 | 210 | * @var int |
|
439 | 210 | */ |
|
440 | $iEnd = $this->last; |
||
441 | 210 | ||
442 | 210 | /** |
|
443 | * Whether last parsed character is a whitespace. |
||
444 | * |
||
445 | * @var bool |
||
446 | */ |
||
447 | $lastSpace = false; |
||
448 | |||
449 | for ($j = 1; $j < Context::LABEL_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) { |
||
450 | 218 | // Composed keywords shouldn't have more than one whitespace between |
|
451 | // keywords. |
||
452 | 218 | if (Context::isWhitespace($this->str[$this->last])) { |
|
453 | if ($lastSpace) { |
||
454 | --$j; // The size of the keyword didn't increase. |
||
455 | continue; |
||
456 | } else { |
||
457 | $lastSpace = true; |
||
458 | } |
||
459 | 218 | } elseif ($this->str[$this->last] === ':') { |
|
460 | $token .= $this->str[$this->last]; |
||
461 | $ret = new Token($token, Token::TYPE_LABEL); |
||
462 | $iEnd = $this->last; |
||
463 | break; |
||
464 | } else { |
||
465 | $lastSpace = false; |
||
466 | 218 | } |
|
467 | $token .= $this->str[$this->last]; |
||
468 | 218 | } |
|
469 | 218 | ||
470 | 218 | $this->last = $iEnd; |
|
471 | 165 | ||
472 | 165 | return $ret; |
|
473 | 165 | } |
|
474 | 218 | ||
475 | /** |
||
476 | 218 | * Parses an operator. |
|
477 | 218 | * |
|
478 | * @return Token |
||
479 | */ |
||
480 | public function parseOperator() |
||
481 | { |
||
482 | $token = ''; |
||
483 | |||
484 | /** |
||
485 | 218 | * Value to be returned. |
|
486 | * |
||
487 | 218 | * @var Token |
|
488 | */ |
||
489 | 218 | $ret = null; |
|
490 | 218 | ||
491 | /** |
||
492 | * The value of `$this->last` where `$token` ends in `$this->str`. |
||
493 | 206 | * |
|
494 | 36 | * @var int |
|
495 | 36 | */ |
|
496 | $iEnd = $this->last; |
||
497 | 206 | ||
498 | 206 | for ($j = 1; $j < Context::OPERATOR_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) { |
|
499 | $token .= $this->str[$this->last]; |
||
500 | if ($flags = Context::isOperator($token)) { |
||
501 | $ret = new Token($token, Token::TYPE_OPERATOR, $flags); |
||
502 | $iEnd = $this->last; |
||
503 | } |
||
504 | } |
||
505 | |||
506 | 218 | $this->last = $iEnd; |
|
507 | |||
508 | 218 | return $ret; |
|
509 | 218 | } |
|
510 | |||
511 | /** |
||
512 | 218 | * Parses a whitespace. |
|
513 | 2 | * |
|
514 | 2 | * @return Token |
|
515 | 2 | */ |
|
516 | 2 | View Code Duplication | public function parseWhitespace() |
1 ignored issue
–
show
|
|||
517 | 2 | { |
|
518 | $token = $this->str[$this->last]; |
||
519 | |||
520 | if (!Context::isWhitespace($token)) { |
||
521 | 218 | return null; |
|
522 | 217 | } |
|
523 | 217 | ||
524 | 9 | while (++$this->last < $this->len && Context::isWhitespace($this->str[$this->last])) { |
|
525 | $token .= $this->str[$this->last]; |
||
526 | } |
||
527 | |||
528 | 9 | --$this->last; |
|
529 | 2 | ||
530 | return new Token($token, Token::TYPE_WHITESPACE); |
||
531 | } |
||
532 | |||
533 | 9 | /** |
|
534 | 2 | * Parses a comment. |
|
535 | 2 | * |
|
536 | * @return Token |
||
537 | 2 | */ |
|
538 | 2 | public function parseComment() |
|
539 | 2 | { |
|
540 | 2 | $iBak = $this->last; |
|
541 | 1 | $token = $this->str[$this->last]; |
|
542 | 1 | ||
543 | 2 | // Bash style comments. (#comment\n) |
|
544 | if (Context::isComment($token)) { |
||
545 | View Code Duplication | while ( |
|
546 | ++$this->last < $this->len |
||
547 | 2 | && $this->str[$this->last] !== "\n" |
|
548 | ) { |
||
549 | $token .= $this->str[$this->last]; |
||
550 | } |
||
551 | 9 | $token .= "\n"; // Adding the line ending. |
|
552 | 9 | return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_BASH); |
|
553 | 9 | } |
|
554 | 9 | ||
555 | 9 | // C style comments. (/*comment*\/) |
|
556 | if (++$this->last < $this->len) { |
||
557 | $token .= $this->str[$this->last]; |
||
558 | 9 | if (Context::isComment($token)) { |
|
559 | 9 | $flags = Token::FLAG_COMMENT_C; |
|
560 | 9 | ||
561 | 9 | // This comment already ended. It may be a part of a |
|
562 | // previous MySQL specific command. |
||
563 | 217 | if ($token === '*/') { |
|
564 | return new Token($token, Token::TYPE_COMMENT, $flags); |
||
565 | } |
||
566 | 218 | ||
567 | 216 | // Checking if this is a MySQL-specific command. |
|
568 | 216 | if ($this->last + 1 < $this->len |
|
569 | && $this->str[$this->last + 1] === '!' |
||
570 | 3 | ) { |
|
571 | 3 | $flags |= Token::FLAG_COMMENT_MYSQL_CMD; |
|
572 | 3 | $token .= $this->str[++$this->last]; |
|
573 | 3 | ||
574 | 3 | while ( |
|
575 | 3 | ++$this->last < $this->len |
|
576 | 3 | && '0' <= $this->str[$this->last] |
|
577 | && $this->str[$this->last] <= '9' |
||
578 | 216 | ) { |
|
579 | $token .= $this->str[$this->last]; |
||
580 | 218 | } |
|
581 | 218 | --$this->last; |
|
582 | |||
583 | // We split this comment and parse only its beginning |
||
584 | // here. |
||
585 | return new Token($token, Token::TYPE_COMMENT, $flags); |
||
586 | } |
||
587 | |||
588 | // Parsing the comment. |
||
589 | 210 | while ( |
|
590 | ++$this->last < $this->len |
||
591 | 210 | && ( |
|
592 | $this->str[$this->last - 1] !== '*' |
||
593 | || $this->str[$this->last] !== '/' |
||
594 | 55 | ) |
|
595 | ) { |
||
596 | $token .= $this->str[$this->last]; |
||
597 | 210 | } |
|
598 | 210 | ||
599 | 210 | // Adding the ending. |
|
600 | if ($this->last < $this->len) { |
||
601 | 210 | $token .= $this->str[$this->last]; |
|
602 | 1 | } |
|
603 | 210 | ||
604 | 209 | return new Token($token, Token::TYPE_COMMENT, $flags); |
|
605 | 209 | } |
|
606 | 1 | } |
|
607 | |||
608 | 209 | // SQL style comments. (-- comment\n) |
|
609 | if (++$this->last < $this->len) { |
||
610 | 210 | $token .= $this->str[$this->last]; |
|
611 | 210 | if (Context::isComment($token)) { |
|
612 | // Checking if this comment did not end already (```--\n```). |
||
613 | if ($this->str[$this->last] !== "\n") { |
||
614 | View Code Duplication | while ( |
|
615 | ++$this->last < $this->len |
||
616 | && $this->str[$this->last] !== "\n" |
||
617 | ) { |
||
618 | $token .= $this->str[$this->last]; |
||
619 | 218 | } |
|
620 | $token .= "\n"; // Adding the line ending. |
||
621 | } |
||
622 | |||
623 | return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL); |
||
624 | } |
||
625 | } |
||
626 | |||
627 | $this->last = $iBak; |
||
628 | |||
629 | return null; |
||
630 | } |
||
631 | |||
632 | /** |
||
633 | * Parses a boolean. |
||
634 | * |
||
635 | * @return Token |
||
636 | */ |
||
637 | public function parseBool() |
||
638 | { |
||
639 | if ($this->last + 3 >= $this->len) { |
||
640 | // At least `min(strlen('TRUE'), strlen('FALSE'))` characters are |
||
641 | // required. |
||
642 | return null; |
||
643 | } |
||
644 | |||
645 | $iBak = $this->last; |
||
646 | $token = $this->str[$this->last] . $this->str[++$this->last] |
||
647 | . $this->str[++$this->last] . $this->str[++$this->last]; // _TRUE_ or _FALS_e |
||
648 | |||
649 | if (Context::isBool($token)) { |
||
650 | return new Token($token, Token::TYPE_BOOL); |
||
651 | } elseif (++$this->last < $this->len) { |
||
652 | $token .= $this->str[$this->last]; // fals_E_ |
||
653 | if (Context::isBool($token)) { |
||
654 | return new Token($token, Token::TYPE_BOOL, 1); |
||
655 | } |
||
656 | } |
||
657 | 218 | ||
658 | 218 | $this->last = $iBak; |
|
659 | 218 | ||
660 | 218 | return null; |
|
661 | 218 | } |
|
662 | 218 | ||
663 | 218 | /** |
|
664 | 3 | * Parses a number. |
|
665 | 218 | * |
|
666 | 218 | * @return Token |
|
667 | 218 | */ |
|
668 | 12 | public function parseNumber() |
|
669 | 218 | { |
|
670 | 1 | // A rudimentary state machine is being used to parse numbers due to |
|
671 | 1 | // the various forms of their notation. |
|
672 | 218 | // |
|
673 | 93 | // Below are the states of the machines and the conditions to change |
|
674 | 218 | // the state. |
|
675 | 21 | // |
|
676 | 218 | // 1 --------------------[ + or - ]-------------------> 1 |
|
677 | 22 | // 1 -------------------[ 0x or 0X ]------------------> 2 |
|
678 | 218 | // 1 --------------------[ 0 to 9 ]-------------------> 3 |
|
679 | // 1 -----------------------[ . ]---------------------> 4 |
||
680 | 218 | // 1 -----------------------[ b ]---------------------> 7 |
|
681 | // |
||
682 | 120 | // 2 --------------------[ 0 to F ]-------------------> 2 |
|
683 | 1 | // |
|
684 | 1 | // 3 --------------------[ 0 to 9 ]-------------------> 3 |
|
685 | 1 | // 3 -----------------------[ . ]---------------------> 4 |
|
686 | 1 | // 3 --------------------[ e or E ]-------------------> 5 |
|
687 | 1 | // |
|
688 | 1 | // 4 --------------------[ 0 to 9 ]-------------------> 4 |
|
689 | // 4 --------------------[ e or E ]-------------------> 5 |
||
690 | 108 | // |
|
691 | 82 | // 5 ---------------[ + or - or 0 to 9 ]--------------> 6 |
|
692 | 2 | // |
|
693 | 82 | // 7 -----------------------[ ' ]---------------------> 8 |
|
694 | 1 | // |
|
695 | 82 | // 8 --------------------[ 0 or 1 ]-------------------> 8 |
|
696 | // 8 -----------------------[ ' ]---------------------> 9 |
||
697 | 80 | // |
|
698 | // State 1 may be reached by negative numbers. |
||
699 | 67 | // State 2 is reached only by hex numbers. |
|
700 | 22 | // State 4 is reached only by float numbers. |
|
701 | 22 | // State 5 is reached only by numbers in approximate form. |
|
702 | 2 | // State 7 is reached only by numbers in bit representation. |
|
703 | 22 | // |
|
704 | // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a |
||
705 | 22 | // state other than these is invalid. |
|
706 | $iBak = $this->last; |
||
707 | 23 | $token = ''; |
|
708 | 2 | $flags = 0; |
|
709 | 2 | $state = 1; |
|
710 | 2 | for (; $this->last < $this->len; ++$this->last) { |
|
711 | 2 | if ($state === 1) { |
|
712 | 1 | if ($this->str[$this->last] === '-') { |
|
713 | 1 | $flags |= Token::FLAG_NUMBER_NEGATIVE; |
|
714 | 2 | } elseif ($this->last + 1 < $this->len |
|
715 | && $this->str[$this->last] === '0' |
||
716 | 21 | && ( |
|
717 | 1 | $this->str[$this->last + 1] === 'x' |
|
718 | || $this->str[$this->last + 1] === 'X' |
||
719 | 1 | ) |
|
720 | ) { |
||
721 | 21 | $token .= $this->str[$this->last++]; |
|
722 | 21 | $state = 2; |
|
723 | 21 | } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') { |
|
724 | 1 | $state = 3; |
|
725 | 1 | } elseif ($this->str[$this->last] === '.') { |
|
726 | 20 | $state = 4; |
|
727 | } elseif ($this->str[$this->last] === 'b') { |
||
728 | 1 | $state = 7; |
|
729 | 1 | } elseif ($this->str[$this->last] !== '+') { |
|
730 | 1 | // `+` is a valid character in a number. |
|
731 | 1 | break; |
|
732 | 1 | } |
|
733 | 1 | } elseif ($state === 2) { |
|
734 | 1 | $flags |= Token::FLAG_NUMBER_HEX; |
|
735 | if ( |
||
736 | 1 | !( |
|
737 | 1 | ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') |
|
738 | || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F') |
||
739 | 120 | || ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f') |
|
740 | 120 | ) |
|
741 | 218 | ) { |
|
742 | 218 | break; |
|
743 | 218 | } |
|
744 | 218 | } elseif ($state === 3) { |
|
745 | 93 | if ($this->str[$this->last] === '.') { |
|
746 | 93 | $state = 4; |
|
747 | } elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') { |
||
748 | 218 | $state = 5; |
|
749 | 218 | } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { |
|
750 | // Just digits and `.`, `e` and `E` are valid characters. |
||
751 | break; |
||
752 | } |
||
753 | } elseif ($state === 4) { |
||
754 | $flags |= Token::FLAG_NUMBER_FLOAT; |
||
755 | View Code Duplication | if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') { |
|
756 | $state = 5; |
||
757 | } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { |
||
758 | // Just digits, `e` and `E` are valid characters. |
||
759 | 210 | break; |
|
760 | } |
||
761 | 210 | } elseif ($state === 5) { |
|
762 | 210 | $flags |= Token::FLAG_NUMBER_APPROXIMATE; |
|
763 | 210 | View Code Duplication | if ($this->str[$this->last] === '+' || $this->str[$this->last] === '-' |
764 | || ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') |
||
765 | 75 | ) { |
|
766 | $state = 6; |
||
767 | 75 | } else { |
|
768 | 75 | break; |
|
769 | 75 | } |
|
770 | 75 | } elseif ($state === 6) { |
|
771 | 75 | if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') { |
|
772 | 6 | // Just digits are valid characters. |
|
773 | 6 | break; |
|
774 | 75 | } |
|
775 | 74 | } elseif ($state === 7) { |
|
776 | $flags |= Token::FLAG_NUMBER_BINARY; |
||
777 | 73 | if ($this->str[$this->last] === '\'') { |
|
778 | $state = 8; |
||
779 | 73 | } else { |
|
780 | break; |
||
781 | 75 | } |
|
782 | 4 | } elseif ($state === 8) { |
|
783 | 4 | if ($this->str[$this->last] === '\'') { |
|
784 | 4 | $state = 9; |
|
785 | } elseif ($this->str[$this->last] !== '0' |
||
786 | 4 | && $this->str[$this->last] !== '1' |
|
787 | 4 | ) { |
|
788 | 4 | break; |
|
789 | 4 | } |
|
790 | 4 | } elseif ($state === 9) { |
|
791 | 74 | break; |
|
792 | } |
||
793 | 75 | $token .= $this->str[$this->last]; |
|
794 | } |
||
795 | if ($state === 2 || $state === 3 |
||
796 | || ($token !== '.' && $state === 4) |
||
797 | || $state === 6 || $state === 9 |
||
798 | ) { |
||
799 | --$this->last; |
||
800 | |||
801 | 210 | return new Token($token, Token::TYPE_NUMBER, $flags); |
|
802 | } |
||
803 | 210 | $this->last = $iBak; |
|
804 | 210 | ||
805 | 210 | return null; |
|
806 | } |
||
807 | |||
808 | 55 | /** |
|
809 | 15 | * Parses a string. |
|
810 | * |
||
811 | 1 | * @param string $quote additional starting symbol |
|
812 | 1 | * |
|
813 | 1 | * @return Token |
|
814 | 15 | */ |
|
815 | 45 | public function parseString($quote = '') |
|
816 | { |
||
817 | $token = $this->str[$this->last]; |
||
818 | 55 | if (!($flags = Context::isString($token)) && $token !== $quote) { |
|
819 | return null; |
||
820 | 55 | } |
|
821 | 55 | $quote = $token; |
|
822 | 11 | ||
823 | 1 | while (++$this->last < $this->len) { |
|
824 | 1 | if ($this->last + 1 < $this->len |
|
825 | 1 | && ( |
|
826 | 1 | ($this->str[$this->last] === $quote && $this->str[$this->last + 1] === $quote) |
|
827 | 1 | || ($this->str[$this->last] === '\\' && $quote !== '`') |
|
828 | 1 | ) |
|
829 | 11 | ) { |
|
830 | 55 | $token .= $this->str[$this->last] . $this->str[++$this->last]; |
|
831 | } else { |
||
832 | 55 | if ($this->str[$this->last] === $quote) { |
|
833 | 55 | break; |
|
834 | 55 | } |
|
835 | $token .= $this->str[$this->last]; |
||
836 | 55 | } |
|
837 | } |
||
838 | |||
839 | if ($this->last >= $this->len || $this->str[$this->last] !== $quote) { |
||
840 | $this->error( |
||
841 | sprintf( |
||
842 | Translator::gettext('Ending quote %1$s was expected.'), |
||
843 | $quote |
||
844 | 164 | ), |
|
845 | '', |
||
846 | 164 | $this->last |
|
847 | 164 | ); |
|
848 | 3 | } else { |
|
849 | $token .= $this->str[$this->last]; |
||
850 | 163 | } |
|
851 | 153 | ||
852 | 153 | return new Token($token, Token::TYPE_STRING, $flags); |
|
853 | 163 | } |
|
854 | 163 | ||
855 | /** |
||
856 | * Parses a symbol. |
||
857 | * |
||
858 | * @return Token |
||
859 | */ |
||
860 | public function parseSymbol() |
||
861 | { |
||
862 | 218 | $token = $this->str[$this->last]; |
|
863 | if (!($flags = Context::isSymbol($token))) { |
||
864 | 218 | return null; |
|
865 | } |
||
866 | 218 | ||
867 | 218 | if ($flags & Token::FLAG_SYMBOL_VARIABLE) { |
|
868 | 218 | if ($this->last + 1 < $this->len && $this->str[++$this->last] === '@') { |
|
869 | // This is a system variable (e.g. `@@hostname`). |
||
870 | 65 | $token .= $this->str[$this->last++]; |
|
871 | 65 | $flags |= Token::FLAG_SYMBOL_SYSTEM; |
|
872 | } |
||
873 | 65 | } else { |
|
874 | 65 | $token = ''; |
|
875 | } |
||
876 | |||
877 | $str = null; |
||
878 | |||
879 | if ($this->last < $this->len) { |
||
880 | if (($str = $this->parseString('`')) === null) { |
||
881 | if (($str = static::parseUnknown()) === null) { |
||
882 | $this->error( |
||
883 | 'Variable name was expected.', |
||
884 | $this->str[$this->last], |
||
885 | $this->last |
||
886 | ); |
||
887 | } |
||
888 | } |
||
889 | } |
||
890 | |||
891 | if ($str !== null) { |
||
892 | $token .= $str->token; |
||
893 | } |
||
894 | |||
895 | return new Token($token, Token::TYPE_SYMBOL, $flags); |
||
896 | } |
||
897 | |||
898 | /** |
||
899 | * Parses unknown parts of the query. |
||
900 | * |
||
901 | * @return Token |
||
902 | */ |
||
903 | View Code Duplication | public function parseUnknown() |
|
1 ignored issue
–
show
|
|||
904 | { |
||
905 | $token = $this->str[$this->last]; |
||
906 | if (Context::isSeparator($token)) { |
||
907 | return null; |
||
908 | } |
||
909 | |||
910 | while (++$this->last < $this->len && !Context::isSeparator($this->str[$this->last])) { |
||
911 | $token .= $this->str[$this->last]; |
||
912 | } |
||
913 | --$this->last; |
||
914 | |||
915 | return new Token($token); |
||
916 | } |
||
917 | |||
918 | /** |
||
919 | * Parses the delimiter of the query. |
||
920 | * |
||
921 | * @return Token |
||
922 | */ |
||
923 | public function parseDelimiter() |
||
938 | } |
||
939 |
This check looks for
@param
annotations where the type inferred by our type inference engine differs from the declared type.It makes a suggestion as to what type it considers more descriptive.
Most often this is a case of a parameter that can be null in addition to its declared types.