1 | <?php |
||||
2 | |||||
3 | declare(strict_types=1); |
||||
4 | |||||
5 | namespace GraphQL\Language; |
||||
6 | |||||
7 | use GraphQL\Error\SyntaxError; |
||||
8 | use GraphQL\Utils\BlockString; |
||||
9 | use GraphQL\Utils\Utils; |
||||
10 | use function chr; |
||||
11 | use function hexdec; |
||||
12 | use function mb_convert_encoding; |
||||
13 | use function ord; |
||||
14 | use function pack; |
||||
15 | use function preg_match; |
||||
16 | use function substr; |
||||
17 | |||||
18 | /** |
||||
19 | * A Lexer is a stateful stream generator in that every time |
||||
20 | * it is advanced, it returns the next token in the Source. Assuming the |
||||
21 | * source lexes, the final Token emitted by the lexer will be of kind |
||||
22 | * EOF, after which the lexer will repeatedly return the same EOF token |
||||
23 | * whenever called. |
||||
24 | * |
||||
25 | * Algorithm is O(N) both on memory and time |
||||
26 | */ |
||||
27 | class Lexer |
||||
28 | { |
||||
29 | private const TOKEN_BANG = 33; |
||||
30 | private const TOKEN_HASH = 35; |
||||
31 | private const TOKEN_DOLLAR = 36; |
||||
32 | private const TOKEN_AMP = 38; |
||||
33 | private const TOKEN_PAREN_L = 40; |
||||
34 | private const TOKEN_PAREN_R = 41; |
||||
35 | private const TOKEN_DOT = 46; |
||||
36 | private const TOKEN_COLON = 58; |
||||
37 | private const TOKEN_EQUALS = 61; |
||||
38 | private const TOKEN_AT = 64; |
||||
39 | private const TOKEN_BRACKET_L = 91; |
||||
40 | private const TOKEN_BRACKET_R = 93; |
||||
41 | private const TOKEN_BRACE_L = 123; |
||||
42 | private const TOKEN_PIPE = 124; |
||||
43 | private const TOKEN_BRACE_R = 125; |
||||
44 | |||||
45 | /** @var Source */ |
||||
46 | public $source; |
||||
47 | |||||
48 | /** @var bool[] */ |
||||
49 | public $options; |
||||
50 | |||||
51 | /** |
||||
52 | * The previously focused non-ignored token. |
||||
53 | * |
||||
54 | * @var Token |
||||
55 | */ |
||||
56 | public $lastToken; |
||||
57 | |||||
58 | /** |
||||
59 | * The currently focused non-ignored token. |
||||
60 | * |
||||
61 | * @var Token |
||||
62 | */ |
||||
63 | public $token; |
||||
64 | |||||
65 | /** |
||||
66 | * The (1-indexed) line containing the current token. |
||||
67 | * |
||||
68 | * @var int |
||||
69 | */ |
||||
70 | public $line; |
||||
71 | |||||
72 | /** |
||||
73 | * The character offset at which the current line begins. |
||||
74 | * |
||||
75 | * @var int |
||||
76 | */ |
||||
77 | public $lineStart; |
||||
78 | |||||
79 | /** |
||||
80 | * Current cursor position for UTF8 encoding of the source |
||||
81 | * |
||||
82 | * @var int |
||||
83 | */ |
||||
84 | private $position; |
||||
85 | |||||
86 | /** |
||||
87 | * Current cursor position for ASCII representation of the source |
||||
88 | * |
||||
89 | * @var int |
||||
90 | */ |
||||
91 | private $byteStreamPosition; |
||||
92 | |||||
93 | /** |
||||
94 | * @param bool[] $options |
||||
95 | */ |
||||
96 | 1044 | public function __construct(Source $source, array $options = []) |
|||
97 | { |
||||
98 | 1044 | $startOfFileToken = new Token(Token::SOF, 0, 0, 0, 0, null); |
|||
99 | |||||
100 | 1044 | $this->source = $source; |
|||
101 | 1044 | $this->options = $options; |
|||
102 | 1044 | $this->lastToken = $startOfFileToken; |
|||
103 | 1044 | $this->token = $startOfFileToken; |
|||
104 | 1044 | $this->line = 1; |
|||
105 | 1044 | $this->lineStart = 0; |
|||
106 | 1044 | $this->position = $this->byteStreamPosition = 0; |
|||
107 | 1044 | } |
|||
108 | |||||
109 | /** |
||||
110 | * @return Token |
||||
111 | */ |
||||
112 | 1044 | public function advance() |
|||
113 | { |
||||
114 | 1044 | $this->lastToken = $this->token; |
|||
115 | |||||
116 | 1044 | return $this->token = $this->lookahead(); |
|||
117 | } |
||||
118 | |||||
119 | 1044 | public function lookahead() |
|||
120 | { |
||||
121 | 1044 | $token = $this->token; |
|||
122 | 1044 | if ($token->kind !== Token::EOF) { |
|||
123 | do { |
||||
124 | 1044 | $token = $token->next ?: ($token->next = $this->readToken($token)); |
|||
125 | 1001 | } while ($token->kind === Token::COMMENT); |
|||
126 | } |
||||
127 | |||||
128 | 1001 | return $token; |
|||
129 | } |
||||
130 | |||||
131 | /** |
||||
132 | * @return Token |
||||
133 | * |
||||
134 | * @throws SyntaxError |
||||
135 | */ |
||||
136 | 1044 | private function readToken(Token $prev) |
|||
137 | { |
||||
138 | 1044 | $bodyLength = $this->source->length; |
|||
139 | |||||
140 | 1044 | $this->positionAfterWhitespace(); |
|||
141 | 1044 | $position = $this->position; |
|||
142 | |||||
143 | 1044 | $line = $this->line; |
|||
144 | 1044 | $col = 1 + $position - $this->lineStart; |
|||
145 | |||||
146 | 1044 | if ($position >= $bodyLength) { |
|||
147 | 978 | return new Token(Token::EOF, $bodyLength, $bodyLength, $line, $col, $prev); |
|||
148 | } |
||||
149 | |||||
150 | // Read next char and advance string cursor: |
||||
151 | 1044 | [, $code, $bytes] = $this->readChar(true); |
|||
152 | |||||
153 | switch ($code) { |
||||
154 | 1044 | case self::TOKEN_BANG: |
|||
155 | 120 | return new Token(Token::BANG, $position, $position + 1, $line, $col, $prev); |
|||
156 | 1044 | case self::TOKEN_HASH: // # |
|||
157 | 20 | $this->moveStringCursor(-1, -1 * $bytes); |
|||
158 | |||||
159 | 20 | return $this->readComment($line, $col, $prev); |
|||
160 | 1044 | case self::TOKEN_DOLLAR: |
|||
161 | 144 | return new Token(Token::DOLLAR, $position, $position + 1, $line, $col, $prev); |
|||
162 | 1044 | case self::TOKEN_AMP: |
|||
163 | 9 | return new Token(Token::AMP, $position, $position + 1, $line, $col, $prev); |
|||
164 | 1044 | case self::TOKEN_PAREN_L: |
|||
165 | 462 | return new Token(Token::PAREN_L, $position, $position + 1, $line, $col, $prev); |
|||
166 | 1044 | case self::TOKEN_PAREN_R: |
|||
167 | 460 | return new Token(Token::PAREN_R, $position, $position + 1, $line, $col, $prev); |
|||
168 | 1044 | case self::TOKEN_DOT: // . |
|||
169 | 201 | [, $charCode1] = $this->readChar(true); |
|||
170 | 201 | [, $charCode2] = $this->readChar(true); |
|||
171 | |||||
172 | 201 | if ($charCode1 === self::TOKEN_DOT && $charCode2 === self::TOKEN_DOT) { |
|||
173 | 199 | return new Token(Token::SPREAD, $position, $position + 3, $line, $col, $prev); |
|||
174 | } |
||||
175 | 2 | break; |
|||
176 | 1041 | case self::TOKEN_COLON: |
|||
177 | 613 | return new Token(Token::COLON, $position, $position + 1, $line, $col, $prev); |
|||
178 | 1041 | case self::TOKEN_EQUALS: |
|||
179 | 121 | return new Token(Token::EQUALS, $position, $position + 1, $line, $col, $prev); |
|||
180 | 1041 | case self::TOKEN_AT: |
|||
181 | 151 | return new Token(Token::AT, $position, $position + 1, $line, $col, $prev); |
|||
182 | 1041 | case self::TOKEN_BRACKET_L: |
|||
183 | 123 | return new Token(Token::BRACKET_L, $position, $position + 1, $line, $col, $prev); |
|||
184 | 1041 | case self::TOKEN_BRACKET_R: |
|||
185 | 122 | return new Token(Token::BRACKET_R, $position, $position + 1, $line, $col, $prev); |
|||
186 | 1041 | case self::TOKEN_BRACE_L: |
|||
187 | 954 | return new Token(Token::BRACE_L, $position, $position + 1, $line, $col, $prev); |
|||
188 | 1040 | case self::TOKEN_PIPE: |
|||
189 | 75 | return new Token(Token::PIPE, $position, $position + 1, $line, $col, $prev); |
|||
190 | 1040 | case self::TOKEN_BRACE_R: |
|||
191 | 948 | return new Token(Token::BRACE_R, $position, $position + 1, $line, $col, $prev); |
|||
192 | |||||
193 | // A-Z |
||||
194 | 1039 | case 65: |
|||
195 | 1039 | case 66: |
|||
196 | 1039 | case 67: |
|||
197 | 1039 | case 68: |
|||
198 | 1039 | case 69: |
|||
199 | 1039 | case 70: |
|||
200 | 1039 | case 71: |
|||
201 | 1039 | case 72: |
|||
202 | 1039 | case 73: |
|||
203 | 1039 | case 74: |
|||
204 | 1039 | case 75: |
|||
205 | 1039 | case 76: |
|||
206 | 1039 | case 77: |
|||
207 | 1035 | case 78: |
|||
208 | 1035 | case 79: |
|||
209 | 1035 | case 80: |
|||
210 | 1035 | case 81: |
|||
211 | 1035 | case 82: |
|||
212 | 1035 | case 83: |
|||
213 | 1034 | case 84: |
|||
214 | 1033 | case 85: |
|||
215 | 1033 | case 86: |
|||
216 | 1033 | case 87: |
|||
217 | 1033 | case 88: |
|||
218 | 1033 | case 89: |
|||
219 | 1033 | case 90: |
|||
220 | // _ |
||||
221 | 1033 | case 95: |
|||
222 | // a-z |
||||
223 | 1033 | case 97: |
|||
224 | 1029 | case 98: |
|||
225 | 1028 | case 99: |
|||
226 | 1019 | case 100: |
|||
227 | 1017 | case 101: |
|||
228 | 1013 | case 102: |
|||
229 | 971 | case 103: |
|||
230 | 970 | case 104: |
|||
231 | 968 | case 105: |
|||
232 | 953 | case 106: |
|||
233 | 953 | case 107: |
|||
234 | 953 | case 108: |
|||
235 | 953 | case 109: |
|||
236 | 948 | case 110: |
|||
237 | 938 | case 111: |
|||
238 | 856 | case 112: |
|||
239 | 848 | case 113: |
|||
240 | 701 | case 114: |
|||
241 | 695 | case 115: |
|||
242 | 627 | case 116: |
|||
243 | 431 | case 117: |
|||
244 | 333 | case 118: |
|||
245 | 307 | case 119: |
|||
246 | 271 | case 120: |
|||
247 | 254 | case 121: |
|||
248 | 246 | case 122: |
|||
249 | 992 | return $this->moveStringCursor(-1, -1 * $bytes) |
|||
250 | 992 | ->readName($line, $col, $prev); |
|||
251 | |||||
252 | // - |
||||
253 | 246 | case 45: |
|||
254 | // 0-9 |
||||
255 | 242 | case 48: |
|||
256 | 240 | case 49: |
|||
257 | 207 | case 50: |
|||
258 | 192 | case 51: |
|||
259 | 188 | case 52: |
|||
260 | 172 | case 53: |
|||
261 | 168 | case 54: |
|||
262 | 165 | case 55: |
|||
263 | 165 | case 56: |
|||
264 | 163 | case 57: |
|||
265 | 112 | return $this->moveStringCursor(-1, -1 * $bytes) |
|||
266 | 112 | ->readNumber($line, $col, $prev); |
|||
267 | |||||
268 | // " |
||||
269 | 162 | case 34: |
|||
270 | 153 | [, $nextCode] = $this->readChar(); |
|||
271 | 153 | [, $nextNextCode] = $this->moveStringCursor(1, 1)->readChar(); |
|||
272 | |||||
273 | 153 | if ($nextCode === 34 && $nextNextCode === 34) { |
|||
274 | 25 | return $this->moveStringCursor(-2, (-1 * $bytes) - 1) |
|||
275 | 25 | ->readBlockString($line, $col, $prev); |
|||
276 | } |
||||
277 | |||||
278 | 138 | return $this->moveStringCursor(-2, (-1 * $bytes) - 1) |
|||
279 | 138 | ->readString($line, $col, $prev); |
|||
280 | } |
||||
281 | |||||
282 | 11 | throw new SyntaxError( |
|||
283 | 11 | $this->source, |
|||
284 | 11 | $position, |
|||
285 | 11 | $this->unexpectedCharacterMessage($code) |
|||
286 | ); |
||||
287 | } |
||||
288 | |||||
289 | 11 | private function unexpectedCharacterMessage($code) |
|||
290 | { |
||||
291 | // SourceCharacter |
||||
292 | 11 | if ($code < 0x0020 && $code !== 0x0009 && $code !== 0x000A && $code !== 0x000D) { |
|||
293 | 1 | return 'Cannot contain the invalid character ' . Utils::printCharCode($code); |
|||
294 | } |
||||
295 | |||||
296 | 10 | if ($code === 39) { |
|||
297 | return "Unexpected single quote character ('), did you mean to use " . |
||||
298 | 1 | 'a double quote (")?'; |
|||
299 | } |
||||
300 | |||||
301 | 9 | return 'Cannot parse the unexpected character ' . Utils::printCharCode($code) . '.'; |
|||
302 | } |
||||
303 | |||||
304 | /** |
||||
305 | * Reads an alphanumeric + underscore name from the source. |
||||
306 | * |
||||
307 | * [_A-Za-z][_0-9A-Za-z]* |
||||
308 | * |
||||
309 | * @param int $line |
||||
310 | * @param int $col |
||||
311 | * |
||||
312 | * @return Token |
||||
313 | */ |
||||
314 | 992 | private function readName($line, $col, Token $prev) |
|||
315 | { |
||||
316 | 992 | $value = ''; |
|||
317 | 992 | $start = $this->position; |
|||
318 | 992 | [$char, $code] = $this->readChar(); |
|||
319 | |||||
320 | 992 | while ($code !== null && ( |
|||
321 | 992 | $code === 95 || // _ |
|||
322 | 992 | ($code >= 48 && $code <= 57) || // 0-9 |
|||
323 | 992 | ($code >= 65 && $code <= 90) || // A-Z |
|||
324 | 992 | ($code >= 97 && $code <= 122) // a-z |
|||
325 | )) { |
||||
326 | 992 | $value .= $char; |
|||
327 | 992 | [$char, $code] = $this->moveStringCursor(1, 1)->readChar(); |
|||
328 | } |
||||
329 | |||||
330 | 992 | return new Token( |
|||
331 | 992 | Token::NAME, |
|||
332 | 992 | $start, |
|||
333 | 992 | $this->position, |
|||
334 | 992 | $line, |
|||
335 | 992 | $col, |
|||
336 | 992 | $prev, |
|||
337 | 992 | $value |
|||
338 | ); |
||||
339 | } |
||||
340 | |||||
341 | /** |
||||
342 | * Reads a number token from the source file, either a float |
||||
343 | * or an int depending on whether a decimal point appears. |
||||
344 | * |
||||
345 | * Int: -?(0|[1-9][0-9]*) |
||||
346 | * Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)? |
||||
347 | * |
||||
348 | * @param int $line |
||||
349 | * @param int $col |
||||
350 | * |
||||
351 | * @return Token |
||||
352 | * |
||||
353 | * @throws SyntaxError |
||||
354 | */ |
||||
355 | 112 | private function readNumber($line, $col, Token $prev) |
|||
356 | { |
||||
357 | 112 | $value = ''; |
|||
358 | 112 | $start = $this->position; |
|||
359 | 112 | [$char, $code] = $this->readChar(); |
|||
360 | |||||
361 | 112 | $isFloat = false; |
|||
362 | |||||
363 | 112 | if ($code === 45) { // - |
|||
364 | 6 | $value .= $char; |
|||
365 | 6 | [$char, $code] = $this->moveStringCursor(1, 1)->readChar(); |
|||
366 | } |
||||
367 | |||||
368 | // guard against leading zero's |
||||
369 | 112 | if ($code === 48) { // 0 |
|||
370 | 3 | $value .= $char; |
|||
371 | 3 | [$char, $code] = $this->moveStringCursor(1, 1)->readChar(); |
|||
372 | |||||
373 | 3 | if ($code >= 48 && $code <= 57) { |
|||
374 | 1 | throw new SyntaxError( |
|||
375 | 1 | $this->source, |
|||
376 | 1 | $this->position, |
|||
377 | 3 | 'Invalid number, unexpected digit after 0: ' . Utils::printCharCode($code) |
|||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||||
378 | ); |
||||
379 | } |
||||
380 | } else { |
||||
381 | 110 | $value .= $this->readDigits(); |
|||
382 | 108 | [$char, $code] = $this->readChar(); |
|||
383 | } |
||||
384 | |||||
385 | 109 | if ($code === 46) { // . |
|||
386 | 20 | $isFloat = true; |
|||
387 | 20 | $this->moveStringCursor(1, 1); |
|||
388 | |||||
389 | 20 | $value .= $char; |
|||
390 | 20 | $value .= $this->readDigits(); |
|||
391 | 17 | [$char, $code] = $this->readChar(); |
|||
392 | } |
||||
393 | |||||
394 | 106 | if ($code === 69 || $code === 101) { // E e |
|||
395 | 5 | $isFloat = true; |
|||
396 | 5 | $value .= $char; |
|||
397 | 5 | [$char, $code] = $this->moveStringCursor(1, 1)->readChar(); |
|||
398 | |||||
399 | 5 | if ($code === 43 || $code === 45) { // + - |
|||
400 | 1 | $value .= $char; |
|||
401 | 1 | $this->moveStringCursor(1, 1); |
|||
402 | } |
||||
403 | 5 | $value .= $this->readDigits(); |
|||
404 | } |
||||
405 | |||||
406 | 104 | return new Token( |
|||
407 | 104 | $isFloat ? Token::FLOAT : Token::INT, |
|||
408 | 104 | $start, |
|||
409 | 104 | $this->position, |
|||
410 | 104 | $line, |
|||
411 | 104 | $col, |
|||
412 | 104 | $prev, |
|||
413 | 104 | $value |
|||
414 | ); |
||||
415 | } |
||||
416 | |||||
417 | /** |
||||
418 | * Returns string with all digits + changes current string cursor position to point to the first char after digits |
||||
419 | */ |
||||
420 | 110 | private function readDigits() |
|||
421 | { |
||||
422 | 110 | [$char, $code] = $this->readChar(); |
|||
423 | |||||
424 | 110 | if ($code >= 48 && $code <= 57) { // 0 - 9 |
|||
425 | 108 | $value = ''; |
|||
426 | |||||
427 | do { |
||||
428 | 108 | $value .= $char; |
|||
429 | 108 | [$char, $code] = $this->moveStringCursor(1, 1)->readChar(); |
|||
430 | 108 | } while ($code >= 48 && $code <= 57); // 0 - 9 |
|||
431 | |||||
432 | 108 | return $value; |
|||
433 | } |
||||
434 | |||||
435 | 7 | if ($this->position > $this->source->length - 1) { |
|||
436 | 2 | $code = null; |
|||
437 | } |
||||
438 | |||||
439 | 7 | throw new SyntaxError( |
|||
440 | 7 | $this->source, |
|||
441 | 7 | $this->position, |
|||
442 | 7 | 'Invalid number, expected digit but got: ' . Utils::printCharCode($code) |
|||
0 ignored issues
–
show
It seems like
$code can also be of type string ; however, parameter $code of GraphQL\Utils\Utils::printCharCode() does only seem to accept integer|null , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
443 | ); |
||||
444 | } |
||||
445 | |||||
446 | /** |
||||
447 | * @param int $line |
||||
448 | * @param int $col |
||||
449 | * |
||||
450 | * @return Token |
||||
451 | * |
||||
452 | * @throws SyntaxError |
||||
453 | */ |
||||
454 | 138 | private function readString($line, $col, Token $prev) |
|||
455 | { |
||||
456 | 138 | $start = $this->position; |
|||
457 | |||||
458 | // Skip leading quote and read first string char: |
||||
459 | 138 | [$char, $code, $bytes] = $this->moveStringCursor(1, 1)->readChar(); |
|||
460 | |||||
461 | 138 | $chunk = ''; |
|||
462 | 138 | $value = ''; |
|||
463 | |||||
464 | 138 | while ($code !== null && |
|||
465 | // not LineTerminator |
||||
466 | 138 | $code !== 10 && $code !== 13 |
|||
467 | ) { |
||||
468 | // Closing Quote (") |
||||
469 | 137 | if ($code === 34) { |
|||
470 | 117 | $value .= $chunk; |
|||
471 | |||||
472 | // Skip quote |
||||
473 | 117 | $this->moveStringCursor(1, 1); |
|||
474 | |||||
475 | 117 | return new Token( |
|||
476 | 117 | Token::STRING, |
|||
477 | 117 | $start, |
|||
478 | 117 | $this->position, |
|||
479 | 117 | $line, |
|||
480 | 117 | $col, |
|||
481 | 117 | $prev, |
|||
482 | 117 | $value |
|||
483 | ); |
||||
484 | } |
||||
485 | |||||
486 | 137 | $this->assertValidStringCharacterCode($code, $this->position); |
|||
487 | 137 | $this->moveStringCursor(1, $bytes); |
|||
0 ignored issues
–
show
It seems like
$bytes can also be of type string ; however, parameter $byteStreamOffset of GraphQL\Language\Lexer::moveStringCursor() does only seem to accept integer , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
488 | |||||
489 | 137 | if ($code === 92) { // \ |
|||
490 | 19 | $value .= $chunk; |
|||
491 | 19 | [, $code] = $this->readChar(true); |
|||
492 | |||||
493 | switch ($code) { |
||||
494 | 19 | case 34: |
|||
495 | 1 | $value .= '"'; |
|||
496 | 1 | break; |
|||
497 | 19 | case 47: |
|||
498 | $value .= '/'; |
||||
499 | break; |
||||
500 | 19 | case 92: |
|||
501 | 1 | $value .= '\\'; |
|||
502 | 1 | break; |
|||
503 | 19 | case 98: |
|||
504 | $value .= chr(8); |
||||
505 | break; // \b (backspace) |
||||
506 | 19 | case 102: |
|||
507 | 1 | $value .= "\f"; |
|||
508 | 1 | break; |
|||
509 | 19 | case 110: |
|||
510 | $value .= "\n"; |
||||
511 | break; |
||||
512 | 19 | case 114: |
|||
513 | $value .= "\r"; |
||||
514 | break; |
||||
515 | 19 | case 116: |
|||
516 | 1 | $value .= "\t"; |
|||
517 | 1 | break; |
|||
518 | 18 | case 117: |
|||
519 | 16 | $position = $this->position; |
|||
520 | 16 | [$hex] = $this->readChars(4, true); |
|||
521 | 16 | if (! preg_match('/[0-9a-fA-F]{4}/', $hex)) { |
|||
522 | 5 | throw new SyntaxError( |
|||
523 | 5 | $this->source, |
|||
524 | 5 | $position - 1, |
|||
525 | 5 | 'Invalid character escape sequence: \\u' . $hex |
|||
526 | ); |
||||
527 | } |
||||
528 | |||||
529 | 11 | $code = hexdec($hex); |
|||
530 | |||||
531 | // UTF-16 surrogate pair detection and handling. |
||||
532 | 11 | $highOrderByte = $code >> 8; |
|||
533 | 11 | if (0xD8 <= $highOrderByte && $highOrderByte <= 0xDF) { |
|||
534 | 9 | [$utf16Continuation] = $this->readChars(6, true); |
|||
535 | 9 | if (! preg_match('/^\\\u[0-9a-fA-F]{4}$/', $utf16Continuation)) { |
|||
536 | 8 | throw new SyntaxError( |
|||
537 | 8 | $this->source, |
|||
538 | 8 | $this->position - 5, |
|||
539 | 8 | 'Invalid UTF-16 trailing surrogate: ' . $utf16Continuation |
|||
540 | ); |
||||
541 | } |
||||
542 | 1 | $surrogatePairHex = $hex . substr($utf16Continuation, 2, 4); |
|||
543 | 1 | $value .= mb_convert_encoding(pack('H*', $surrogatePairHex), 'UTF-8', 'UTF-16'); |
|||
544 | 1 | break; |
|||
545 | } |
||||
546 | |||||
547 | 3 | $this->assertValidStringCharacterCode($code, $position - 2); |
|||
548 | |||||
549 | 1 | $value .= Utils::chr($code); |
|||
550 | 1 | break; |
|||
551 | default: |
||||
552 | 2 | throw new SyntaxError( |
|||
553 | 2 | $this->source, |
|||
554 | 2 | $this->position - 1, |
|||
555 | 2 | 'Invalid character escape sequence: \\' . Utils::chr($code) |
|||
556 | ); |
||||
557 | } |
||||
558 | 2 | $chunk = ''; |
|||
559 | } else { |
||||
560 | 137 | $chunk .= $char; |
|||
561 | } |
||||
562 | |||||
563 | 137 | [$char, $code, $bytes] = $this->readChar(); |
|||
564 | } |
||||
565 | |||||
566 | 4 | throw new SyntaxError( |
|||
567 | 4 | $this->source, |
|||
568 | 4 | $this->position, |
|||
569 | 4 | 'Unterminated string.' |
|||
570 | ); |
||||
571 | } |
||||
572 | |||||
573 | /** |
||||
574 | * Reads a block string token from the source file. |
||||
575 | * |
||||
576 | * """("?"?(\\"""|\\(?!=""")|[^"\\]))*""" |
||||
577 | */ |
||||
578 | 25 | private function readBlockString($line, $col, Token $prev) |
|||
579 | { |
||||
580 | 25 | $start = $this->position; |
|||
581 | |||||
582 | // Skip leading quotes and read first string char: |
||||
583 | 25 | [$char, $code, $bytes] = $this->moveStringCursor(3, 3)->readChar(); |
|||
584 | |||||
585 | 25 | $chunk = ''; |
|||
586 | 25 | $value = ''; |
|||
587 | |||||
588 | 25 | while ($code !== null) { |
|||
589 | // Closing Triple-Quote (""") |
||||
590 | 24 | if ($code === 34) { |
|||
591 | // Move 2 quotes |
||||
592 | 21 | [, $nextCode] = $this->moveStringCursor(1, 1)->readChar(); |
|||
593 | 21 | [, $nextNextCode] = $this->moveStringCursor(1, 1)->readChar(); |
|||
594 | |||||
595 | 21 | if ($nextCode === 34 && $nextNextCode === 34) { |
|||
596 | 21 | $value .= $chunk; |
|||
597 | |||||
598 | 21 | $this->moveStringCursor(1, 1); |
|||
599 | |||||
600 | 21 | return new Token( |
|||
601 | 21 | Token::BLOCK_STRING, |
|||
602 | 21 | $start, |
|||
603 | 21 | $this->position, |
|||
604 | 21 | $line, |
|||
605 | 21 | $col, |
|||
606 | 21 | $prev, |
|||
607 | 21 | BlockString::value($value) |
|||
608 | ); |
||||
609 | } |
||||
610 | |||||
611 | // move cursor back to before the first quote |
||||
612 | 5 | $this->moveStringCursor(-2, -2); |
|||
613 | } |
||||
614 | |||||
615 | 24 | $this->assertValidBlockStringCharacterCode($code, $this->position); |
|||
616 | 24 | $this->moveStringCursor(1, $bytes); |
|||
0 ignored issues
–
show
It seems like
$bytes can also be of type string ; however, parameter $byteStreamOffset of GraphQL\Language\Lexer::moveStringCursor() does only seem to accept integer , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
617 | |||||
618 | 24 | [, $nextCode] = $this->readChar(); |
|||
619 | 24 | [, $nextNextCode] = $this->moveStringCursor(1, 1)->readChar(); |
|||
620 | 24 | [, $nextNextNextCode] = $this->moveStringCursor(1, 1)->readChar(); |
|||
621 | |||||
622 | // Escape Triple-Quote (\""") |
||||
623 | 24 | if ($code === 92 && |
|||
624 | 24 | $nextCode === 34 && |
|||
625 | 24 | $nextNextCode === 34 && |
|||
626 | 24 | $nextNextNextCode === 34 |
|||
627 | ) { |
||||
628 | 9 | $this->moveStringCursor(1, 1); |
|||
629 | 9 | $value .= $chunk . '"""'; |
|||
630 | 9 | $chunk = ''; |
|||
631 | } else { |
||||
632 | 24 | $this->moveStringCursor(-2, -2); |
|||
633 | 24 | $chunk .= $char; |
|||
634 | } |
||||
635 | |||||
636 | 24 | [$char, $code, $bytes] = $this->readChar(); |
|||
637 | } |
||||
638 | |||||
639 | 2 | throw new SyntaxError( |
|||
640 | 2 | $this->source, |
|||
641 | 2 | $this->position, |
|||
642 | 2 | 'Unterminated string.' |
|||
643 | ); |
||||
644 | } |
||||
645 | |||||
646 | 137 | private function assertValidStringCharacterCode($code, $position) |
|||
647 | { |
||||
648 | // SourceCharacter |
||||
649 | 137 | if ($code < 0x0020 && $code !== 0x0009) { |
|||
650 | 2 | throw new SyntaxError( |
|||
651 | 2 | $this->source, |
|||
652 | 2 | $position, |
|||
653 | 2 | 'Invalid character within String: ' . Utils::printCharCode($code) |
|||
654 | ); |
||||
655 | } |
||||
656 | 137 | } |
|||
657 | |||||
658 | 24 | private function assertValidBlockStringCharacterCode($code, $position) |
|||
659 | { |
||||
660 | // SourceCharacter |
||||
661 | 24 | if ($code < 0x0020 && $code !== 0x0009 && $code !== 0x000A && $code !== 0x000D) { |
|||
662 | 2 | throw new SyntaxError( |
|||
663 | 2 | $this->source, |
|||
664 | 2 | $position, |
|||
665 | 2 | 'Invalid character within String: ' . Utils::printCharCode($code) |
|||
666 | ); |
||||
667 | } |
||||
668 | 24 | } |
|||
669 | |||||
670 | /** |
||||
671 | * Reads from body starting at startPosition until it finds a non-whitespace |
||||
672 | * or commented character, then places cursor to the position of that character. |
||||
673 | */ |
||||
674 | 1044 | private function positionAfterWhitespace() |
|||
675 | { |
||||
676 | 1044 | while ($this->position < $this->source->length) { |
|||
677 | 1044 | [, $code, $bytes] = $this->readChar(); |
|||
678 | |||||
679 | // Skip whitespace |
||||
680 | // tab | space | comma | BOM |
||||
681 | 1044 | if ($code === 9 || $code === 32 || $code === 44 || $code === 0xFEFF) { |
|||
682 | 963 | $this->moveStringCursor(1, $bytes); |
|||
0 ignored issues
–
show
It seems like
$bytes can also be of type string ; however, parameter $byteStreamOffset of GraphQL\Language\Lexer::moveStringCursor() does only seem to accept integer , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
683 | 1044 | } elseif ($code === 10) { // new line |
|||
684 | 782 | $this->moveStringCursor(1, $bytes); |
|||
685 | 782 | $this->line++; |
|||
686 | 782 | $this->lineStart = $this->position; |
|||
687 | 1044 | } elseif ($code === 13) { // carriage return |
|||
688 | 1 | [, $nextCode, $nextBytes] = $this->moveStringCursor(1, $bytes)->readChar(); |
|||
689 | |||||
690 | 1 | if ($nextCode === 10) { // lf after cr |
|||
691 | 1 | $this->moveStringCursor(1, $nextBytes); |
|||
692 | } |
||||
693 | 1 | $this->line++; |
|||
694 | 1 | $this->lineStart = $this->position; |
|||
695 | } else { |
||||
696 | 1044 | break; |
|||
697 | } |
||||
698 | } |
||||
699 | 1044 | } |
|||
700 | |||||
701 | /** |
||||
702 | * Reads a comment token from the source file. |
||||
703 | * |
||||
704 | * #[\u0009\u0020-\uFFFF]* |
||||
705 | * |
||||
706 | * @param int $line |
||||
707 | * @param int $col |
||||
708 | * |
||||
709 | * @return Token |
||||
710 | */ |
||||
711 | 20 | private function readComment($line, $col, Token $prev) |
|||
712 | { |
||||
713 | 20 | $start = $this->position; |
|||
714 | 20 | $value = ''; |
|||
715 | 20 | $bytes = 1; |
|||
716 | |||||
717 | do { |
||||
718 | 20 | [$char, $code, $bytes] = $this->moveStringCursor(1, $bytes)->readChar(); |
|||
719 | 20 | $value .= $char; |
|||
720 | 20 | } while ($code !== null && |
|||
721 | // SourceCharacter but not LineTerminator |
||||
722 | 20 | ($code > 0x001F || $code === 0x0009) |
|||
723 | ); |
||||
724 | |||||
725 | 20 | return new Token( |
|||
726 | 20 | Token::COMMENT, |
|||
727 | 20 | $start, |
|||
728 | 20 | $this->position, |
|||
729 | 20 | $line, |
|||
730 | 20 | $col, |
|||
731 | 20 | $prev, |
|||
732 | 20 | $value |
|||
733 | ); |
||||
734 | } |
||||
735 | |||||
736 | /** |
||||
737 | * Reads next UTF8Character from the byte stream, starting from $byteStreamPosition. |
||||
738 | * |
||||
739 | * @param bool $advance |
||||
740 | * @param int $byteStreamPosition |
||||
741 | * |
||||
742 | * @return (string|int)[] |
||||
743 | */ |
||||
744 | 1044 | private function readChar($advance = false, $byteStreamPosition = null) |
|||
745 | { |
||||
746 | 1044 | if ($byteStreamPosition === null) { |
|||
747 | 1044 | $byteStreamPosition = $this->byteStreamPosition; |
|||
748 | } |
||||
749 | |||||
750 | 1044 | $code = null; |
|||
751 | 1044 | $utf8char = ''; |
|||
752 | 1044 | $bytes = 0; |
|||
753 | 1044 | $positionOffset = 0; |
|||
754 | |||||
755 | 1044 | if (isset($this->source->body[$byteStreamPosition])) { |
|||
756 | 1044 | $ord = ord($this->source->body[$byteStreamPosition]); |
|||
757 | |||||
758 | 1044 | if ($ord < 128) { |
|||
759 | 1042 | $bytes = 1; |
|||
760 | 5 | } elseif ($ord < 224) { |
|||
761 | 1 | $bytes = 2; |
|||
762 | 4 | } elseif ($ord < 240) { |
|||
763 | 4 | $bytes = 3; |
|||
764 | } else { |
||||
765 | $bytes = 4; |
||||
766 | } |
||||
767 | |||||
768 | 1044 | $utf8char = ''; |
|||
769 | 1044 | for ($pos = $byteStreamPosition; $pos < $byteStreamPosition + $bytes; $pos++) { |
|||
770 | 1044 | $utf8char .= $this->source->body[$pos]; |
|||
771 | } |
||||
772 | 1044 | $positionOffset = 1; |
|||
773 | 1044 | $code = $bytes === 1 ? $ord : Utils::ord($utf8char); |
|||
774 | } |
||||
775 | |||||
776 | 1044 | if ($advance) { |
|||
777 | 1044 | $this->moveStringCursor($positionOffset, $bytes); |
|||
778 | } |
||||
779 | |||||
780 | 1044 | return [$utf8char, $code, $bytes]; |
|||
781 | } |
||||
782 | |||||
783 | /** |
||||
784 | * Reads next $numberOfChars UTF8 characters from the byte stream, starting from $byteStreamPosition. |
||||
785 | * |
||||
786 | * @param int $charCount |
||||
787 | * @param bool $advance |
||||
788 | * @param null $byteStreamPosition |
||||
0 ignored issues
–
show
|
|||||
789 | * |
||||
790 | * @return (string|int)[] |
||||
791 | */ |
||||
792 | 16 | private function readChars($charCount, $advance = false, $byteStreamPosition = null) |
|||
793 | { |
||||
794 | 16 | $result = ''; |
|||
795 | 16 | $totalBytes = 0; |
|||
796 | 16 | $byteOffset = $byteStreamPosition ?: $this->byteStreamPosition; |
|||
0 ignored issues
–
show
|
|||||
797 | |||||
798 | 16 | for ($i = 0; $i < $charCount; $i++) { |
|||
799 | 16 | [$char, $code, $bytes] = $this->readChar(false, $byteOffset); |
|||
800 | 16 | $totalBytes += $bytes; |
|||
801 | 16 | $byteOffset += $bytes; |
|||
802 | 16 | $result .= $char; |
|||
803 | } |
||||
804 | 16 | if ($advance) { |
|||
805 | 16 | $this->moveStringCursor($charCount, $totalBytes); |
|||
806 | } |
||||
807 | |||||
808 | 16 | return [$result, $totalBytes]; |
|||
809 | } |
||||
810 | |||||
811 | /** |
||||
812 | * Moves internal string cursor position |
||||
813 | * |
||||
814 | * @param int $positionOffset |
||||
815 | * @param int $byteStreamOffset |
||||
816 | * |
||||
817 | * @return self |
||||
818 | */ |
||||
819 | 1044 | private function moveStringCursor($positionOffset, $byteStreamOffset) |
|||
820 | { |
||||
821 | 1044 | $this->position += $positionOffset; |
|||
822 | 1044 | $this->byteStreamPosition += $byteStreamOffset; |
|||
823 | |||||
824 | 1044 | return $this; |
|||
825 | } |
||||
826 | } |
||||
827 |