1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Fubhy\GraphQL\Language; |
4
|
|
|
|
5
|
|
|
class Lexer |
6
|
|
|
{ |
7
|
|
|
/** |
8
|
|
|
* @var \Fubhy\GraphQL\Language\Source |
9
|
|
|
*/ |
10
|
|
|
protected $source; |
11
|
|
|
|
12
|
|
|
/** |
13
|
|
|
* @param \Fubhy\GraphQL\Language\Source $source |
14
|
|
|
*/ |
15
|
426 |
|
public function __construct(Source $source) { |
16
|
426 |
|
$this->source = $source; |
17
|
426 |
|
} |
18
|
|
|
|
19
|
|
|
/** |
20
|
|
|
* Reads from body starting at startPosition until it finds a non-whitespace |
21
|
|
|
* or commented character, then returns the position of that character for |
22
|
|
|
* lexing. |
23
|
|
|
* |
24
|
|
|
* @param int $start |
25
|
|
|
* |
26
|
|
|
* @return int |
27
|
|
|
*/ |
28
|
426 |
|
protected function positionAfterWhitespace($start) |
29
|
|
|
{ |
30
|
426 |
|
$position = $start; |
31
|
426 |
|
$length = $this->source->getLength(); |
32
|
|
|
|
33
|
426 |
|
while ($start < $length) { |
34
|
426 |
|
$code = $this->charCodeAt($position); |
35
|
|
|
|
36
|
|
|
// Skip whitespace. |
37
|
|
|
if ( |
38
|
426 |
|
$code === 32 || // space |
39
|
426 |
|
$code === 44 || // comma |
40
|
426 |
|
$code === 160 || // '\xa0' |
41
|
426 |
|
$code === 0x2028 || // line separator |
42
|
426 |
|
$code === 0x2029 || // paragraph separator |
43
|
426 |
|
$code > 8 && $code < 14 // whitespace |
44
|
426 |
|
) { |
45
|
318 |
|
++$position; |
46
|
|
|
// Skip comments. |
47
|
426 |
|
} elseif ($code === 35) { // # |
48
|
6 |
|
++$position; |
49
|
|
|
|
50
|
|
|
while ( |
51
|
6 |
|
$position < $length && |
52
|
6 |
|
($code = $this->charCodeAt($position)) && |
53
|
6 |
|
$code !== 10 && $code !== 13 && $code !== 0x2028 && $code !== 0x2029 |
54
|
6 |
|
) { |
55
|
6 |
|
++$position; |
56
|
6 |
|
} |
57
|
6 |
|
} else { |
58
|
426 |
|
break; |
59
|
|
|
} |
60
|
318 |
|
} |
61
|
|
|
|
62
|
426 |
|
return $position; |
63
|
|
|
} |
64
|
|
|
|
65
|
|
|
/** |
66
|
|
|
* @param int $start |
67
|
|
|
* |
68
|
|
|
* @return \Fubhy\GraphQL\Language\Token |
69
|
|
|
* |
70
|
|
|
* @throws \Exception |
71
|
|
|
*/ |
72
|
426 |
|
public function readToken($start) |
73
|
|
|
{ |
74
|
426 |
|
$length = $this->source->getLength(); |
75
|
426 |
|
$position = $this->positionAfterWhitespace($start); |
76
|
|
|
|
77
|
426 |
|
if ($position >= $length) { |
78
|
309 |
|
return new Token(Token::EOF_TYPE, $length, $length); |
79
|
|
|
} |
80
|
|
|
|
81
|
426 |
|
$code = $this->charCodeAt($position); |
82
|
|
|
|
83
|
|
|
switch ($code) { |
84
|
|
|
// ! |
85
|
426 |
|
case 33: |
86
|
48 |
|
return new Token(Token::BANG_TYPE, $position, $position + 1); |
87
|
|
|
// $ |
88
|
423 |
|
case 36: |
89
|
81 |
|
return new Token(Token::DOLLAR_TYPE, $position, $position + 1); |
90
|
|
|
// ( |
91
|
420 |
|
case 40: |
92
|
171 |
|
return new Token(Token::PAREN_L_TYPE, $position, $position + 1); |
93
|
|
|
// ) |
94
|
417 |
|
case 41: |
95
|
171 |
|
return new Token(Token::PAREN_R_TYPE, $position, $position + 1); |
96
|
|
|
// . |
97
|
414 |
|
case 46: |
98
|
45 |
|
if ($this->charCodeAt($position + 1) === 46 && $this->charCodeAt($position + 2) === 46) { |
99
|
45 |
|
return new Token(Token::SPREAD_TYPE, $position, $position + 3); |
100
|
|
|
} |
101
|
|
|
break; |
102
|
|
|
// : |
103
|
411 |
|
case 58: |
104
|
174 |
|
return new Token(Token::COLON_TYPE, $position, $position + 1); |
105
|
|
|
// = |
106
|
408 |
|
case 61: |
107
|
6 |
|
return new Token(Token::EQUALS_TYPE, $position, $position + 1); |
108
|
|
|
// @ |
109
|
405 |
|
case 64: |
110
|
18 |
|
return new Token(Token::AT_TYPE, $position, $position + 1); |
111
|
|
|
// [ |
112
|
402 |
|
case 91: |
113
|
48 |
|
return new Token(Token::BRACKET_L_TYPE, $position, $position + 1); |
114
|
|
|
// ] |
115
|
399 |
|
case 93: |
116
|
48 |
|
return new Token(Token::BRACKET_R_TYPE, $position, $position + 1); |
117
|
|
|
// { |
118
|
396 |
|
case 123: |
119
|
312 |
|
return new Token(Token::BRACE_L_TYPE, $position, $position + 1); |
120
|
|
|
// | |
121
|
393 |
|
case 124: |
122
|
3 |
|
return new Token(Token::PIPE_TYPE, $position, $position + 1); |
123
|
|
|
// } |
124
|
390 |
|
case 125: |
125
|
312 |
|
return new Token(Token::BRACE_R_TYPE, $position, $position + 1); |
126
|
|
|
// A-Z |
127
|
387 |
|
case 65: case 66: case 67: case 68: case 69: case 70: case 71: case 72: |
|
|
|
|
128
|
387 |
|
case 73: case 74: case 75: case 76: case 77: case 78: case 79: case 80: |
|
|
|
|
129
|
387 |
|
case 81: case 82: case 83: case 84: case 85: case 86: case 87: case 88: |
|
|
|
|
130
|
387 |
|
case 89: case 90: |
|
|
|
|
131
|
|
|
// _ |
132
|
387 |
|
case 95: |
133
|
|
|
// a-z |
134
|
387 |
|
case 97: case 98: case 99: case 100: case 101: case 102: case 103: case 104: |
|
|
|
|
135
|
387 |
|
case 105: case 106: case 107: case 108: case 109: case 110: case 111: |
|
|
|
|
136
|
387 |
|
case 112: case 113: case 114: case 115: case 116: case 117: case 118: |
|
|
|
|
137
|
387 |
|
case 119: case 120: case 121: case 122: |
|
|
|
|
138
|
318 |
|
return $this->readName($position); |
139
|
|
|
// - |
140
|
147 |
|
case 45: |
141
|
|
|
// 0-9 |
142
|
147 |
|
case 48: case 49: case 50: case 51: case 52: |
|
|
|
|
143
|
147 |
|
case 53: case 54: case 55: case 56: case 57: |
|
|
|
|
144
|
45 |
|
return $this->readNumber($position, $code); |
145
|
|
|
// " |
146
|
111 |
|
case 34: |
147
|
111 |
|
return $this->readString($position); |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
// @todo Throw proper exception. |
151
|
|
|
throw new \Exception('Unexpected character.'); |
152
|
|
|
} |
153
|
|
|
|
154
|
|
|
/** |
155
|
|
|
* Reads a number token from the source file, either a float or an int |
156
|
|
|
* depending on whether a decimal point appears. |
157
|
|
|
* |
158
|
|
|
* Int: -?(0|[1-9][0-9]*) |
159
|
|
|
* Float: -?(0|[1-9][0-9]*)\.[0-9]+(e-?[0-9]+)? |
160
|
|
|
* |
161
|
|
|
* @param int $start |
162
|
|
|
* @param int $code |
163
|
|
|
* |
164
|
|
|
* @return \Fubhy\GraphQL\Language\Token |
165
|
|
|
* |
166
|
|
|
* @throws \Exception |
167
|
|
|
*/ |
168
|
45 |
|
protected function readNumber($start, $code) |
169
|
|
|
{ |
170
|
45 |
|
$position = $start; |
171
|
45 |
|
$type = Token::INT_TYPE; |
172
|
|
|
|
173
|
45 |
|
if ($code === 45) { // - |
174
|
15 |
|
$code = $this->charCodeAt(++$position); |
175
|
15 |
|
} |
176
|
|
|
|
177
|
45 |
|
if ($code === 48) { // 0 |
178
|
9 |
|
$code = $this->charCodeAt(++$position); |
179
|
45 |
View Code Duplication |
} elseif ($code >= 49 && $code <= 57) { // 1 - 9 |
|
|
|
|
180
|
|
|
do { |
181
|
36 |
|
$code = $this->charCodeAt(++$position); |
182
|
36 |
|
} while ($code >= 48 && $code <= 57); // 0 - 9 |
183
|
36 |
|
} else { |
184
|
|
|
// @todo Throw proper exception. |
185
|
|
|
throw new \Exception('Invalid number.'); |
186
|
|
|
} |
187
|
|
|
|
188
|
45 |
|
if ($code === 46) { // . |
189
|
18 |
|
$type = Token::FLOAT_TYPE; |
190
|
|
|
|
191
|
18 |
|
$code = $this->charCodeAt(++$position); |
192
|
18 |
View Code Duplication |
if ($code >= 48 && $code <= 57) { // 0 - 9 |
|
|
|
|
193
|
|
|
do { |
194
|
18 |
|
$code = $this->charCodeAt(++$position); |
195
|
18 |
|
} while ($code >= 48 && $code <= 57); // 0 - 9 |
196
|
18 |
|
} else { |
197
|
|
|
// @todo Throw proper exception. |
198
|
|
|
throw new \Exception('Invalid number.'); |
199
|
|
|
} |
200
|
|
|
|
201
|
18 |
|
if ($code === 69 || $code === 101) { // E e |
202
|
9 |
|
$code = $this->charCodeAt(++$position); |
203
|
9 |
|
if ($code === 43 || $code === 45) { // + - |
204
|
3 |
|
$code = $this->charCodeAt(++$position); |
205
|
3 |
|
} |
206
|
9 |
View Code Duplication |
if ($code >= 48 && $code <= 57) { // 0 - 9 |
|
|
|
|
207
|
|
|
do { |
208
|
9 |
|
$code = $this->charCodeAt(++$position); |
209
|
9 |
|
} while ($code >= 48 && $code <= 57); // 0 - 9 |
210
|
9 |
|
} else { |
211
|
|
|
// @todo Throw proper exception. |
212
|
|
|
throw new \Exception('Invalid number.'); |
213
|
|
|
} |
214
|
9 |
|
} |
215
|
18 |
|
} |
216
|
|
|
|
217
|
45 |
|
$body = $this->source->getBody(); |
218
|
45 |
|
$value = mb_substr($body, $start, $position - $start, 'UTF-8'); |
219
|
45 |
|
return new Token($type, $start, $position, $value); |
220
|
|
|
} |
221
|
|
|
|
222
|
|
|
/** |
223
|
|
|
* @param int $start |
224
|
|
|
* |
225
|
|
|
* @return \Fubhy\GraphQL\Language\Token |
226
|
|
|
* |
227
|
|
|
* @throws \Exception |
228
|
|
|
*/ |
229
|
111 |
|
protected function readString($start) |
230
|
|
|
{ |
231
|
111 |
|
$position = $start + 1; |
232
|
111 |
|
$chunk = $position; |
233
|
111 |
|
$length = $this->source->getLength(); |
234
|
111 |
|
$body = $this->source->getBody(); |
235
|
111 |
|
$code = NULL; |
236
|
111 |
|
$value = ''; |
237
|
|
|
|
238
|
|
|
while ( |
239
|
111 |
|
$position < $length && |
240
|
111 |
|
($code = $this->charCodeAt($position)) && |
241
|
111 |
|
$code !== 34 && |
242
|
111 |
|
$code !== 10 && $code !== 13 && $code !== 0x2028 && $code !== 0x2029 |
243
|
111 |
|
) { |
244
|
111 |
|
++$position; |
245
|
|
|
|
246
|
111 |
|
if ($code === 92) { // \ |
247
|
21 |
|
$value .= mb_substr($body, $chunk, $position - 1 - $chunk, 'UTF-8'); |
248
|
21 |
|
$code = $this->charCodeAt($position); |
249
|
|
|
|
250
|
|
|
switch ($code) { |
251
|
21 |
|
case 34: |
252
|
3 |
|
$value .= '"'; |
253
|
3 |
|
break; |
254
|
18 |
|
case 47: |
255
|
6 |
|
$value .= '\/'; |
256
|
6 |
|
break; |
257
|
18 |
|
case 92: |
258
|
6 |
|
$value .= '\\'; |
259
|
6 |
|
break; |
260
|
12 |
|
case 98: |
261
|
6 |
|
$value .= '\b'; |
262
|
6 |
|
break; |
263
|
12 |
|
case 102: |
264
|
6 |
|
$value .= '\f'; |
265
|
6 |
|
break; |
266
|
12 |
|
case 110: |
267
|
6 |
|
$value .= '\n'; |
268
|
6 |
|
break; |
269
|
12 |
|
case 114: |
270
|
6 |
|
$value .= '\r'; |
271
|
6 |
|
break; |
272
|
12 |
|
case 116: |
273
|
6 |
|
$value .= '\t'; |
274
|
6 |
|
break; |
275
|
6 |
|
case 117: |
276
|
6 |
|
$charCode = $this->uniCharCode( |
277
|
6 |
|
$this->charCodeAt($position + 1), |
278
|
6 |
|
$this->charCodeAt($position + 2), |
279
|
6 |
|
$this->charCodeAt($position + 3), |
280
|
6 |
|
$this->charCodeAt($position + 4) |
281
|
6 |
|
); |
282
|
|
|
|
283
|
6 |
|
if ($charCode < 0) { |
284
|
|
|
// @todo Throw proper exception. |
285
|
|
|
throw new \Exception('Bad character escape sequence.'); |
286
|
|
|
} |
287
|
|
|
|
288
|
6 |
|
$value .= $this->fromCharCode($charCode); |
289
|
6 |
|
$position += 4; |
290
|
6 |
|
break; |
291
|
|
|
default: |
292
|
|
|
// @todo Throw proper exception. |
293
|
|
|
throw new \Exception('Bad character escape sequence.'); |
294
|
|
|
} |
295
|
|
|
|
296
|
21 |
|
++$position; |
297
|
21 |
|
$chunk = $position; |
298
|
21 |
|
} |
299
|
111 |
|
} |
300
|
|
|
|
301
|
111 |
|
if ($code !== 34) { |
302
|
|
|
// @todo Throw proper exception. |
303
|
|
|
throw new \Exception('Unterminated string.'); |
304
|
|
|
} |
305
|
|
|
|
306
|
111 |
|
$value .= mb_substr($body, $chunk, $position - $chunk, 'UTF-8'); |
307
|
111 |
|
return new Token(Token::STRING_TYPE, $start, $position + 1, $value); |
308
|
|
|
} |
309
|
|
|
|
310
|
|
|
/** |
311
|
|
|
* Reads an alphanumeric + underscore name from the source. |
312
|
|
|
* |
313
|
|
|
* [_A-Za-z][_0-9A-Za-z]* |
314
|
|
|
* |
315
|
|
|
* @param int $position |
316
|
|
|
* |
317
|
|
|
* @return \Fubhy\GraphQL\Language\Token |
318
|
|
|
*/ |
319
|
318 |
|
protected function readName($position) |
320
|
|
|
{ |
321
|
318 |
|
$end = $position + 1; |
322
|
318 |
|
$length = $this->source->getLength(); |
323
|
318 |
|
$body = $this->source->getBody(); |
324
|
|
|
|
325
|
|
|
while ( |
326
|
318 |
|
$end < $length && |
327
|
318 |
|
($code = $this->charCodeAt($end)) && |
328
|
|
|
( |
329
|
318 |
|
$code === 95 || // _ |
330
|
318 |
|
$code >= 48 && $code <= 57 || // 0-9 |
331
|
318 |
|
$code >= 65 && $code <= 90 || // A-Z |
332
|
318 |
|
$code >= 97 && $code <= 122 // a-z |
333
|
318 |
|
) |
334
|
318 |
|
) { |
335
|
312 |
|
++$end; |
336
|
312 |
|
} |
337
|
|
|
|
338
|
318 |
|
$value = mb_substr($body, $position, $end - $position, 'UTF-8'); |
339
|
318 |
|
return new Token(Token::NAME_TYPE, $position, $end, $value); |
340
|
|
|
} |
341
|
|
|
|
342
|
|
|
/** |
343
|
|
|
* Implementation of JavaScript's String.prototype.charCodeAt function. |
344
|
|
|
* |
345
|
|
|
* @param int $index |
346
|
|
|
* |
347
|
|
|
* @return null|number |
348
|
|
|
*/ |
349
|
426 |
|
protected function charCodeAt($index) |
350
|
|
|
{ |
351
|
426 |
|
$body = $this->source->getBody(); |
352
|
426 |
|
$char = mb_substr($body, $index, 1, 'UTF-8'); |
353
|
|
|
|
354
|
426 |
|
if (mb_check_encoding($char, 'UTF-8')) { |
355
|
426 |
|
return hexdec(bin2hex(mb_convert_encoding($char, 'UTF-32BE', 'UTF-8'))); |
356
|
|
|
} else { |
357
|
|
|
return NULL; |
358
|
|
|
} |
359
|
|
|
} |
360
|
|
|
|
361
|
|
|
/** |
362
|
|
|
* Implementation of JavaScript's String.fromCharCode function. |
363
|
|
|
* |
364
|
|
|
* @param int $code |
365
|
|
|
* |
366
|
|
|
* @return string |
367
|
|
|
*/ |
368
|
6 |
|
protected function fromCharCode($code) |
369
|
|
|
{ |
370
|
6 |
|
$code = intval($code); |
371
|
6 |
|
return mb_convert_encoding("&#{$code};", 'UTF-8', 'HTML-ENTITIES'); |
372
|
|
|
} |
373
|
|
|
|
374
|
|
|
/** |
375
|
|
|
* Converts four hexadecimal chars to the integer that the |
376
|
|
|
* string represents. For example, uniCharCode('0','0','0','f') |
377
|
|
|
* will return 15, and uniCharCode('0','0','f','f') returns 255. |
378
|
|
|
* |
379
|
|
|
* Returns a negative number on error, if a char was invalid. |
380
|
|
|
* |
381
|
|
|
* This is implemented by noting that char2hex() returns -1 on error, |
382
|
|
|
* which means the result of ORing the char2hex() will also be negative. |
383
|
|
|
* |
384
|
|
|
* @param $a |
385
|
|
|
* @param $b |
386
|
|
|
* @param $c |
387
|
|
|
* @param $d |
388
|
|
|
* |
389
|
|
|
* @return int |
390
|
|
|
*/ |
391
|
6 |
|
protected function uniCharCode($a, $b, $c, $d) |
392
|
|
|
{ |
393
|
6 |
|
return $this->char2hex($a) << 12 | $this->char2hex($b) << 8 | $this->char2hex($c) << 4 | $this->char2hex($d); |
394
|
|
|
} |
395
|
|
|
|
396
|
|
|
/** |
397
|
|
|
* Converts a hex character to its integer value. |
398
|
|
|
* '0' becomes 0, '9' becomes 9 |
399
|
|
|
* 'A' becomes 10, 'F' becomes 15 |
400
|
|
|
* 'a' becomes 10, 'f' becomes 15 |
401
|
|
|
* |
402
|
|
|
* Returns -1 on error. |
403
|
|
|
* |
404
|
|
|
* @param $a |
405
|
|
|
* |
406
|
|
|
* @return int |
407
|
|
|
*/ |
408
|
6 |
|
protected function char2hex($a) |
409
|
|
|
{ |
410
|
|
|
return |
411
|
6 |
|
$a >= 48 && $a <= 57 ? $a - 48 : // 0-9 |
412
|
6 |
|
($a >= 65 && $a <= 70 ? $a - 55 : // A-F |
413
|
6 |
|
($a >= 97 && $a <= 102 ? $a - 87 : -1)); // a-f |
414
|
|
|
} |
415
|
|
|
} |
416
|
|
|
|
According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.
}
To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.