|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace Fubhy\GraphQL\Language; |
|
4
|
|
|
|
|
5
|
|
|
class Lexer |
|
6
|
|
|
{ |
|
7
|
|
|
/** |
|
8
|
|
|
* @var \Fubhy\GraphQL\Language\Source |
|
9
|
|
|
*/ |
|
10
|
|
|
protected $source; |
|
11
|
|
|
|
|
12
|
|
|
/** |
|
13
|
|
|
* @param \Fubhy\GraphQL\Language\Source $source |
|
14
|
|
|
*/ |
|
15
|
426 |
|
public function __construct(Source $source) { |
|
16
|
426 |
|
$this->source = $source; |
|
17
|
426 |
|
} |
|
18
|
|
|
|
|
19
|
|
|
/** |
|
20
|
|
|
* Reads from body starting at startPosition until it finds a non-whitespace |
|
21
|
|
|
* or commented character, then returns the position of that character for |
|
22
|
|
|
* lexing. |
|
23
|
|
|
* |
|
24
|
|
|
* @param int $start |
|
25
|
|
|
* |
|
26
|
|
|
* @return int |
|
27
|
|
|
*/ |
|
28
|
426 |
|
protected function positionAfterWhitespace($start) |
|
29
|
|
|
{ |
|
30
|
426 |
|
$position = $start; |
|
31
|
426 |
|
$length = $this->source->getLength(); |
|
32
|
|
|
|
|
33
|
426 |
|
while ($start < $length) { |
|
34
|
426 |
|
$code = $this->charCodeAt($position); |
|
35
|
|
|
|
|
36
|
|
|
// Skip whitespace. |
|
37
|
|
|
if ( |
|
38
|
426 |
|
$code === 32 || // space |
|
39
|
426 |
|
$code === 44 || // comma |
|
40
|
426 |
|
$code === 160 || // '\xa0' |
|
41
|
426 |
|
$code === 0x2028 || // line separator |
|
42
|
426 |
|
$code === 0x2029 || // paragraph separator |
|
43
|
426 |
|
$code > 8 && $code < 14 // whitespace |
|
44
|
426 |
|
) { |
|
45
|
318 |
|
++$position; |
|
46
|
|
|
// Skip comments. |
|
47
|
426 |
|
} elseif ($code === 35) { // # |
|
48
|
6 |
|
++$position; |
|
49
|
|
|
|
|
50
|
|
|
while ( |
|
51
|
6 |
|
$position < $length && |
|
52
|
6 |
|
($code = $this->charCodeAt($position)) && |
|
53
|
6 |
|
$code !== 10 && $code !== 13 && $code !== 0x2028 && $code !== 0x2029 |
|
54
|
6 |
|
) { |
|
55
|
6 |
|
++$position; |
|
56
|
6 |
|
} |
|
57
|
6 |
|
} else { |
|
58
|
426 |
|
break; |
|
59
|
|
|
} |
|
60
|
318 |
|
} |
|
61
|
|
|
|
|
62
|
426 |
|
return $position; |
|
63
|
|
|
} |
|
64
|
|
|
|
|
65
|
|
|
/** |
|
66
|
|
|
* @param int $start |
|
67
|
|
|
* |
|
68
|
|
|
* @return \Fubhy\GraphQL\Language\Token |
|
69
|
|
|
* |
|
70
|
|
|
* @throws \Exception |
|
71
|
|
|
*/ |
|
72
|
426 |
|
public function readToken($start) |
|
73
|
|
|
{ |
|
74
|
426 |
|
$length = $this->source->getLength(); |
|
75
|
426 |
|
$position = $this->positionAfterWhitespace($start); |
|
76
|
|
|
|
|
77
|
426 |
|
if ($position >= $length) { |
|
78
|
309 |
|
return new Token(Token::EOF_TYPE, $length, $length); |
|
79
|
|
|
} |
|
80
|
|
|
|
|
81
|
426 |
|
$code = $this->charCodeAt($position); |
|
82
|
|
|
|
|
83
|
|
|
switch ($code) { |
|
84
|
|
|
// ! |
|
85
|
426 |
|
case 33: |
|
86
|
48 |
|
return new Token(Token::BANG_TYPE, $position, $position + 1); |
|
87
|
|
|
// $ |
|
88
|
423 |
|
case 36: |
|
89
|
81 |
|
return new Token(Token::DOLLAR_TYPE, $position, $position + 1); |
|
90
|
|
|
// ( |
|
91
|
420 |
|
case 40: |
|
92
|
171 |
|
return new Token(Token::PAREN_L_TYPE, $position, $position + 1); |
|
93
|
|
|
// ) |
|
94
|
417 |
|
case 41: |
|
95
|
171 |
|
return new Token(Token::PAREN_R_TYPE, $position, $position + 1); |
|
96
|
|
|
// . |
|
97
|
414 |
|
case 46: |
|
98
|
45 |
|
if ($this->charCodeAt($position + 1) === 46 && $this->charCodeAt($position + 2) === 46) { |
|
99
|
45 |
|
return new Token(Token::SPREAD_TYPE, $position, $position + 3); |
|
100
|
|
|
} |
|
101
|
|
|
break; |
|
102
|
|
|
// : |
|
103
|
411 |
|
case 58: |
|
104
|
174 |
|
return new Token(Token::COLON_TYPE, $position, $position + 1); |
|
105
|
|
|
// = |
|
106
|
408 |
|
case 61: |
|
107
|
6 |
|
return new Token(Token::EQUALS_TYPE, $position, $position + 1); |
|
108
|
|
|
// @ |
|
109
|
405 |
|
case 64: |
|
110
|
18 |
|
return new Token(Token::AT_TYPE, $position, $position + 1); |
|
111
|
|
|
// [ |
|
112
|
402 |
|
case 91: |
|
113
|
48 |
|
return new Token(Token::BRACKET_L_TYPE, $position, $position + 1); |
|
114
|
|
|
// ] |
|
115
|
399 |
|
case 93: |
|
116
|
48 |
|
return new Token(Token::BRACKET_R_TYPE, $position, $position + 1); |
|
117
|
|
|
// { |
|
118
|
396 |
|
case 123: |
|
119
|
312 |
|
return new Token(Token::BRACE_L_TYPE, $position, $position + 1); |
|
120
|
|
|
// | |
|
121
|
393 |
|
case 124: |
|
122
|
3 |
|
return new Token(Token::PIPE_TYPE, $position, $position + 1); |
|
123
|
|
|
// } |
|
124
|
390 |
|
case 125: |
|
125
|
312 |
|
return new Token(Token::BRACE_R_TYPE, $position, $position + 1); |
|
126
|
|
|
// A-Z |
|
127
|
387 |
|
case 65: case 66: case 67: case 68: case 69: case 70: case 71: case 72: |
|
|
|
|
|
|
128
|
387 |
|
case 73: case 74: case 75: case 76: case 77: case 78: case 79: case 80: |
|
|
|
|
|
|
129
|
387 |
|
case 81: case 82: case 83: case 84: case 85: case 86: case 87: case 88: |
|
|
|
|
|
|
130
|
387 |
|
case 89: case 90: |
|
|
|
|
|
|
131
|
|
|
// _ |
|
132
|
387 |
|
case 95: |
|
133
|
|
|
// a-z |
|
134
|
387 |
|
case 97: case 98: case 99: case 100: case 101: case 102: case 103: case 104: |
|
|
|
|
|
|
135
|
387 |
|
case 105: case 106: case 107: case 108: case 109: case 110: case 111: |
|
|
|
|
|
|
136
|
387 |
|
case 112: case 113: case 114: case 115: case 116: case 117: case 118: |
|
|
|
|
|
|
137
|
387 |
|
case 119: case 120: case 121: case 122: |
|
|
|
|
|
|
138
|
318 |
|
return $this->readName($position); |
|
139
|
|
|
// - |
|
140
|
147 |
|
case 45: |
|
141
|
|
|
// 0-9 |
|
142
|
147 |
|
case 48: case 49: case 50: case 51: case 52: |
|
|
|
|
|
|
143
|
147 |
|
case 53: case 54: case 55: case 56: case 57: |
|
|
|
|
|
|
144
|
45 |
|
return $this->readNumber($position, $code); |
|
145
|
|
|
// " |
|
146
|
111 |
|
case 34: |
|
147
|
111 |
|
return $this->readString($position); |
|
148
|
|
|
} |
|
149
|
|
|
|
|
150
|
|
|
// @todo Throw proper exception. |
|
151
|
|
|
throw new \Exception('Unexpected character.'); |
|
152
|
|
|
} |
|
153
|
|
|
|
|
154
|
|
|
/** |
|
155
|
|
|
* Reads a number token from the source file, either a float or an int |
|
156
|
|
|
* depending on whether a decimal point appears. |
|
157
|
|
|
* |
|
158
|
|
|
* Int: -?(0|[1-9][0-9]*) |
|
159
|
|
|
* Float: -?(0|[1-9][0-9]*)\.[0-9]+(e-?[0-9]+)? |
|
160
|
|
|
* |
|
161
|
|
|
* @param int $start |
|
162
|
|
|
* @param int $code |
|
163
|
|
|
* |
|
164
|
|
|
* @return \Fubhy\GraphQL\Language\Token |
|
165
|
|
|
* |
|
166
|
|
|
* @throws \Exception |
|
167
|
|
|
*/ |
|
168
|
45 |
|
protected function readNumber($start, $code) |
|
169
|
|
|
{ |
|
170
|
45 |
|
$position = $start; |
|
171
|
45 |
|
$type = Token::INT_TYPE; |
|
172
|
|
|
|
|
173
|
45 |
|
if ($code === 45) { // - |
|
174
|
15 |
|
$code = $this->charCodeAt(++$position); |
|
175
|
15 |
|
} |
|
176
|
|
|
|
|
177
|
45 |
|
if ($code === 48) { // 0 |
|
178
|
9 |
|
$code = $this->charCodeAt(++$position); |
|
179
|
45 |
View Code Duplication |
} elseif ($code >= 49 && $code <= 57) { // 1 - 9 |
|
|
|
|
|
|
180
|
|
|
do { |
|
181
|
36 |
|
$code = $this->charCodeAt(++$position); |
|
182
|
36 |
|
} while ($code >= 48 && $code <= 57); // 0 - 9 |
|
183
|
36 |
|
} else { |
|
184
|
|
|
// @todo Throw proper exception. |
|
185
|
|
|
throw new \Exception('Invalid number.'); |
|
186
|
|
|
} |
|
187
|
|
|
|
|
188
|
45 |
|
if ($code === 46) { // . |
|
189
|
18 |
|
$type = Token::FLOAT_TYPE; |
|
190
|
|
|
|
|
191
|
18 |
|
$code = $this->charCodeAt(++$position); |
|
192
|
18 |
View Code Duplication |
if ($code >= 48 && $code <= 57) { // 0 - 9 |
|
|
|
|
|
|
193
|
|
|
do { |
|
194
|
18 |
|
$code = $this->charCodeAt(++$position); |
|
195
|
18 |
|
} while ($code >= 48 && $code <= 57); // 0 - 9 |
|
196
|
18 |
|
} else { |
|
197
|
|
|
// @todo Throw proper exception. |
|
198
|
|
|
throw new \Exception('Invalid number.'); |
|
199
|
|
|
} |
|
200
|
|
|
|
|
201
|
18 |
|
if ($code === 69 || $code === 101) { // E e |
|
202
|
9 |
|
$code = $this->charCodeAt(++$position); |
|
203
|
9 |
|
if ($code === 43 || $code === 45) { // + - |
|
204
|
3 |
|
$code = $this->charCodeAt(++$position); |
|
205
|
3 |
|
} |
|
206
|
9 |
View Code Duplication |
if ($code >= 48 && $code <= 57) { // 0 - 9 |
|
|
|
|
|
|
207
|
|
|
do { |
|
208
|
9 |
|
$code = $this->charCodeAt(++$position); |
|
209
|
9 |
|
} while ($code >= 48 && $code <= 57); // 0 - 9 |
|
210
|
9 |
|
} else { |
|
211
|
|
|
// @todo Throw proper exception. |
|
212
|
|
|
throw new \Exception('Invalid number.'); |
|
213
|
|
|
} |
|
214
|
9 |
|
} |
|
215
|
18 |
|
} |
|
216
|
|
|
|
|
217
|
45 |
|
$body = $this->source->getBody(); |
|
218
|
45 |
|
$value = mb_substr($body, $start, $position - $start, 'UTF-8'); |
|
219
|
45 |
|
return new Token($type, $start, $position, $value); |
|
220
|
|
|
} |
|
221
|
|
|
|
|
222
|
|
|
/** |
|
223
|
|
|
* @param int $start |
|
224
|
|
|
* |
|
225
|
|
|
* @return \Fubhy\GraphQL\Language\Token |
|
226
|
|
|
* |
|
227
|
|
|
* @throws \Exception |
|
228
|
|
|
*/ |
|
229
|
111 |
|
protected function readString($start) |
|
230
|
|
|
{ |
|
231
|
111 |
|
$position = $start + 1; |
|
232
|
111 |
|
$chunk = $position; |
|
233
|
111 |
|
$length = $this->source->getLength(); |
|
234
|
111 |
|
$body = $this->source->getBody(); |
|
235
|
111 |
|
$code = NULL; |
|
236
|
111 |
|
$value = ''; |
|
237
|
|
|
|
|
238
|
|
|
while ( |
|
239
|
111 |
|
$position < $length && |
|
240
|
111 |
|
($code = $this->charCodeAt($position)) && |
|
241
|
111 |
|
$code !== 34 && |
|
242
|
111 |
|
$code !== 10 && $code !== 13 && $code !== 0x2028 && $code !== 0x2029 |
|
243
|
111 |
|
) { |
|
244
|
111 |
|
++$position; |
|
245
|
|
|
|
|
246
|
111 |
|
if ($code === 92) { // \ |
|
247
|
21 |
|
$value .= mb_substr($body, $chunk, $position - 1 - $chunk, 'UTF-8'); |
|
248
|
21 |
|
$code = $this->charCodeAt($position); |
|
249
|
|
|
|
|
250
|
|
|
switch ($code) { |
|
251
|
21 |
|
case 34: |
|
252
|
3 |
|
$value .= '"'; |
|
253
|
3 |
|
break; |
|
254
|
18 |
|
case 47: |
|
255
|
6 |
|
$value .= '\/'; |
|
256
|
6 |
|
break; |
|
257
|
18 |
|
case 92: |
|
258
|
6 |
|
$value .= '\\'; |
|
259
|
6 |
|
break; |
|
260
|
12 |
|
case 98: |
|
261
|
6 |
|
$value .= '\b'; |
|
262
|
6 |
|
break; |
|
263
|
12 |
|
case 102: |
|
264
|
6 |
|
$value .= '\f'; |
|
265
|
6 |
|
break; |
|
266
|
12 |
|
case 110: |
|
267
|
6 |
|
$value .= '\n'; |
|
268
|
6 |
|
break; |
|
269
|
12 |
|
case 114: |
|
270
|
6 |
|
$value .= '\r'; |
|
271
|
6 |
|
break; |
|
272
|
12 |
|
case 116: |
|
273
|
6 |
|
$value .= '\t'; |
|
274
|
6 |
|
break; |
|
275
|
6 |
|
case 117: |
|
276
|
6 |
|
$charCode = $this->uniCharCode( |
|
277
|
6 |
|
$this->charCodeAt($position + 1), |
|
278
|
6 |
|
$this->charCodeAt($position + 2), |
|
279
|
6 |
|
$this->charCodeAt($position + 3), |
|
280
|
6 |
|
$this->charCodeAt($position + 4) |
|
281
|
6 |
|
); |
|
282
|
|
|
|
|
283
|
6 |
|
if ($charCode < 0) { |
|
284
|
|
|
// @todo Throw proper exception. |
|
285
|
|
|
throw new \Exception('Bad character escape sequence.'); |
|
286
|
|
|
} |
|
287
|
|
|
|
|
288
|
6 |
|
$value .= $this->fromCharCode($charCode); |
|
289
|
6 |
|
$position += 4; |
|
290
|
6 |
|
break; |
|
291
|
|
|
default: |
|
292
|
|
|
// @todo Throw proper exception. |
|
293
|
|
|
throw new \Exception('Bad character escape sequence.'); |
|
294
|
|
|
} |
|
295
|
|
|
|
|
296
|
21 |
|
++$position; |
|
297
|
21 |
|
$chunk = $position; |
|
298
|
21 |
|
} |
|
299
|
111 |
|
} |
|
300
|
|
|
|
|
301
|
111 |
|
if ($code !== 34) { |
|
302
|
|
|
// @todo Throw proper exception. |
|
303
|
|
|
throw new \Exception('Unterminated string.'); |
|
304
|
|
|
} |
|
305
|
|
|
|
|
306
|
111 |
|
$value .= mb_substr($body, $chunk, $position - $chunk, 'UTF-8'); |
|
307
|
111 |
|
return new Token(Token::STRING_TYPE, $start, $position + 1, $value); |
|
308
|
|
|
} |
|
309
|
|
|
|
|
310
|
|
|
/** |
|
311
|
|
|
* Reads an alphanumeric + underscore name from the source. |
|
312
|
|
|
* |
|
313
|
|
|
* [_A-Za-z][_0-9A-Za-z]* |
|
314
|
|
|
* |
|
315
|
|
|
* @param int $position |
|
316
|
|
|
* |
|
317
|
|
|
* @return \Fubhy\GraphQL\Language\Token |
|
318
|
|
|
*/ |
|
319
|
318 |
|
protected function readName($position) |
|
320
|
|
|
{ |
|
321
|
318 |
|
$end = $position + 1; |
|
322
|
318 |
|
$length = $this->source->getLength(); |
|
323
|
318 |
|
$body = $this->source->getBody(); |
|
324
|
|
|
|
|
325
|
|
|
while ( |
|
326
|
318 |
|
$end < $length && |
|
327
|
318 |
|
($code = $this->charCodeAt($end)) && |
|
328
|
|
|
( |
|
329
|
318 |
|
$code === 95 || // _ |
|
330
|
318 |
|
$code >= 48 && $code <= 57 || // 0-9 |
|
331
|
318 |
|
$code >= 65 && $code <= 90 || // A-Z |
|
332
|
318 |
|
$code >= 97 && $code <= 122 // a-z |
|
333
|
318 |
|
) |
|
334
|
318 |
|
) { |
|
335
|
312 |
|
++$end; |
|
336
|
312 |
|
} |
|
337
|
|
|
|
|
338
|
318 |
|
$value = mb_substr($body, $position, $end - $position, 'UTF-8'); |
|
339
|
318 |
|
return new Token(Token::NAME_TYPE, $position, $end, $value); |
|
340
|
|
|
} |
|
341
|
|
|
|
|
342
|
|
|
/** |
|
343
|
|
|
* Implementation of JavaScript's String.prototype.charCodeAt function. |
|
344
|
|
|
* |
|
345
|
|
|
* @param int $index |
|
346
|
|
|
* |
|
347
|
|
|
* @return null|number |
|
348
|
|
|
*/ |
|
349
|
426 |
|
protected function charCodeAt($index) |
|
350
|
|
|
{ |
|
351
|
426 |
|
$body = $this->source->getBody(); |
|
352
|
426 |
|
$char = mb_substr($body, $index, 1, 'UTF-8'); |
|
353
|
|
|
|
|
354
|
426 |
|
if (mb_check_encoding($char, 'UTF-8')) { |
|
355
|
426 |
|
return hexdec(bin2hex(mb_convert_encoding($char, 'UTF-32BE', 'UTF-8'))); |
|
356
|
|
|
} else { |
|
357
|
|
|
return NULL; |
|
358
|
|
|
} |
|
359
|
|
|
} |
|
360
|
|
|
|
|
361
|
|
|
/** |
|
362
|
|
|
* Implementation of JavaScript's String.fromCharCode function. |
|
363
|
|
|
* |
|
364
|
|
|
* @param int $code |
|
365
|
|
|
* |
|
366
|
|
|
* @return string |
|
367
|
|
|
*/ |
|
368
|
6 |
|
protected function fromCharCode($code) |
|
369
|
|
|
{ |
|
370
|
6 |
|
$code = intval($code); |
|
371
|
6 |
|
return mb_convert_encoding("&#{$code};", 'UTF-8', 'HTML-ENTITIES'); |
|
372
|
|
|
} |
|
373
|
|
|
|
|
374
|
|
|
/** |
|
375
|
|
|
* Converts four hexadecimal chars to the integer that the |
|
376
|
|
|
* string represents. For example, uniCharCode('0','0','0','f') |
|
377
|
|
|
* will return 15, and uniCharCode('0','0','f','f') returns 255. |
|
378
|
|
|
* |
|
379
|
|
|
* Returns a negative number on error, if a char was invalid. |
|
380
|
|
|
* |
|
381
|
|
|
* This is implemented by noting that char2hex() returns -1 on error, |
|
382
|
|
|
* which means the result of ORing the char2hex() will also be negative. |
|
383
|
|
|
* |
|
384
|
|
|
* @param $a |
|
385
|
|
|
* @param $b |
|
386
|
|
|
* @param $c |
|
387
|
|
|
* @param $d |
|
388
|
|
|
* |
|
389
|
|
|
* @return int |
|
390
|
|
|
*/ |
|
391
|
6 |
|
protected function uniCharCode($a, $b, $c, $d) |
|
392
|
|
|
{ |
|
393
|
6 |
|
return $this->char2hex($a) << 12 | $this->char2hex($b) << 8 | $this->char2hex($c) << 4 | $this->char2hex($d); |
|
394
|
|
|
} |
|
395
|
|
|
|
|
396
|
|
|
/** |
|
397
|
|
|
* Converts a hex character to its integer value. |
|
398
|
|
|
* '0' becomes 0, '9' becomes 9 |
|
399
|
|
|
* 'A' becomes 10, 'F' becomes 15 |
|
400
|
|
|
* 'a' becomes 10, 'f' becomes 15 |
|
401
|
|
|
* |
|
402
|
|
|
* Returns -1 on error. |
|
403
|
|
|
* |
|
404
|
|
|
* @param $a |
|
405
|
|
|
* |
|
406
|
|
|
* @return int |
|
407
|
|
|
*/ |
|
408
|
6 |
|
protected function char2hex($a) |
|
409
|
|
|
{ |
|
410
|
|
|
return |
|
411
|
6 |
|
$a >= 48 && $a <= 57 ? $a - 48 : // 0-9 |
|
412
|
6 |
|
($a >= 65 && $a <= 70 ? $a - 55 : // A-F |
|
413
|
6 |
|
($a >= 97 && $a <= 102 ? $a - 87 : -1)); // a-f |
|
414
|
|
|
} |
|
415
|
|
|
} |
|
416
|
|
|
|
According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.
}
To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.