1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Graze\Morphism\Parse; |
4
|
|
|
|
5
|
|
|
use Exception; |
6
|
|
|
use Graze\Morphism\Test\Parse\TestCase; |
7
|
|
|
use LogicException; |
8
|
|
|
use ReflectionClass; |
9
|
|
|
use ReflectionException; |
10
|
|
|
use RuntimeException; |
11
|
|
|
|
12
|
|
|
class TokenStreamTest extends TestCase |
13
|
|
|
{ |
14
|
|
|
public function testNewFromText() |
15
|
|
|
{ |
16
|
|
|
$stream = TokenStream::newFromText('', ''); |
17
|
|
|
$this->assertThat($stream, $this->isInstanceOf(__NAMESPACE__ . '\TokenStream')); |
18
|
|
|
} |
19
|
|
|
|
20
|
|
|
public function testNewFromFile() |
21
|
|
|
{ |
22
|
|
|
$stream = TokenStream::newFromFile("/dev/null"); |
23
|
|
|
$this->assertThat($stream, $this->isInstanceOf(__NAMESPACE__ . '\TokenStream')); |
24
|
|
|
} |
25
|
|
|
|
26
|
|
|
/** @expectedException Exception */ |
27
|
|
|
public function testNewFromFileNotFound() |
28
|
|
|
{ |
29
|
|
|
TokenStream::newFromFile(dirname(__FILE__) . "/file_not_found"); |
30
|
|
|
} |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* @param string $expectedType |
34
|
|
|
* @param mixed $expectedValue |
35
|
|
|
* @param Token $token |
36
|
|
|
*/ |
37
|
|
|
public function assertTokenEq($expectedType, $expectedValue, Token $token) |
38
|
|
|
{ |
39
|
|
|
$this->assertTrue( |
40
|
|
|
$token->eq($expectedType, $expectedValue), |
41
|
|
|
"expected {$expectedType}[{$expectedValue}], but got " . $token->toDebugString() |
42
|
|
|
); |
43
|
|
|
} |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* @dataProvider nextTokenProvider |
47
|
|
|
* @param string $text |
48
|
|
|
* @param string $expectedType |
49
|
|
|
* @param mixed $expectedNextTokenValue |
50
|
|
|
*/ |
51
|
|
|
public function testNextToken($text, $expectedType, $expectedNextTokenValue) |
52
|
|
|
{ |
53
|
|
|
$stream = $this->makeStream($text); |
54
|
|
|
$token = $stream->nextToken(); |
55
|
|
|
$this->assertTokenEq($expectedType, $expectedNextTokenValue, $token); |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* @return array |
60
|
|
|
*/ |
61
|
|
|
public function nextTokenProvider() |
62
|
|
|
{ |
63
|
|
|
$sq = "'"; |
64
|
|
|
$dq = '"'; |
65
|
|
|
$bq = '`'; |
66
|
|
|
$bs = "\\"; |
67
|
|
|
|
68
|
|
|
return [ |
69
|
|
|
[ '', Token::EOF, '' ], |
70
|
|
|
|
71
|
|
|
// numbers |
72
|
|
|
[ '1', Token::NUMBER, '1' ], |
73
|
|
|
[ '123', Token::NUMBER, '123' ], |
74
|
|
|
[ '123.45', Token::NUMBER, '123.45' ], |
75
|
|
|
[ '.45', Token::NUMBER, '.45' ], |
76
|
|
|
[ '123.', Token::NUMBER, '123.' ], |
77
|
|
|
[ '-123', Token::NUMBER, '-123' ], |
78
|
|
|
[ '+123', Token::NUMBER, '+123' ], |
79
|
|
|
[ '1E23', Token::NUMBER, '1E23' ], |
80
|
|
|
[ '1e23', Token::NUMBER, '1e23' ], |
81
|
|
|
[ '1e+23', Token::NUMBER, '1e+23' ], |
82
|
|
|
[ '1e-23', Token::NUMBER, '1e-23' ], |
83
|
|
|
[ '+1.23e-17', Token::NUMBER, '+1.23e-17' ], |
84
|
|
|
|
85
|
|
|
// whitespace |
86
|
|
|
[ " 1", Token::NUMBER, 1], |
87
|
|
|
[ "\t1", Token::NUMBER, 1], |
88
|
|
|
[ "\n1", Token::NUMBER, 1], |
89
|
|
|
|
90
|
|
|
// comments |
91
|
|
|
[ "/*comment*/1", Token::NUMBER, '1'], |
92
|
|
|
[ "/**/1", Token::NUMBER, '1'], |
93
|
|
|
[ "-- comment\n1", Token::NUMBER, '1'], |
94
|
|
|
[ "--\n1", Token::NUMBER, '1'], |
95
|
|
|
[ "#comment\n1", Token::NUMBER, '1'], |
96
|
|
|
[ "#comment", Token::EOF, null], |
97
|
|
|
|
98
|
|
|
// conditional comments |
99
|
|
|
[ "/*! 12345*/", Token::NUMBER, '12345'], |
100
|
|
|
[ "/*!12345 45678*/", Token::NUMBER, '45678'], |
101
|
|
|
|
102
|
|
|
// double quoted strings |
103
|
|
|
[ "{$dq}{$dq}", Token::STRING, ''], |
104
|
|
|
[ "{$dq}hello world{$dq}", Token::STRING, 'hello world'], |
105
|
|
|
[ "{$dq}hello{$dq}{$dq}world{$dq}", Token::STRING, "hello{$dq}world"], // "" => " |
106
|
|
|
[ "{$dq}hello{$bs}{$bs}world{$dq}", Token::STRING, "hello{$bs}world"], // \\ => \ |
107
|
|
|
[ "{$dq}hello{$bs}{$dq}world{$dq}", Token::STRING, "hello{$dq}world"], // \" => " |
108
|
|
|
|
109
|
|
|
// single quoted strings |
110
|
|
|
[ "{$sq}{$sq}", Token::STRING, ''], |
111
|
|
|
[ "{$sq}hello{$sq}", Token::STRING, 'hello'], |
112
|
|
|
[ "{$sq}hello{$sq}{$sq}world{$sq}", Token::STRING, "hello{$sq}world"], // '' => ' |
113
|
|
|
[ "{$sq}hello{$bs}{$bs}world{$sq}", Token::STRING, "hello{$bs}world"], // \\ => \ |
114
|
|
|
[ "{$sq}hello{$bs}{$sq}world{$sq}", Token::STRING, "hello{$sq}world"], // \' => ' |
115
|
|
|
|
116
|
|
|
// backquoted identifiers |
117
|
|
|
[ "{$bq}{$bq}", Token::IDENTIFIER, ''], |
118
|
|
|
[ "{$bq}hello{$bq}", Token::IDENTIFIER, 'hello'], |
119
|
|
|
[ "{$bq}hello{$bq}{$bq}world{$bq}", Token::IDENTIFIER, "hello{$bq}world"], // `` => ` |
120
|
|
|
[ "{$bq}hello{$bs}{$bs}world{$bq}", Token::IDENTIFIER, "hello{$bs}${bs}world"], // \\ => \\ |
121
|
|
|
[ "{$bq}hello{$bs}nworld{$bq}", Token::IDENTIFIER, "hello{$bs}nworld"], // \n => \n |
122
|
|
|
|
123
|
|
|
// hex literals |
124
|
|
|
[ "x''", Token::HEX, "" ], |
125
|
|
|
[ "x'00'", Token::HEX, "00" ], |
126
|
|
|
[ "x'0123456789abcdef'", Token::HEX, "0123456789abcdef" ], |
127
|
|
|
[ "x'0123456789ABCDEF'", Token::HEX, "0123456789ABCDEF" ], |
128
|
|
|
[ "0x0123456789abcdef", Token::HEX, "0123456789abcdef" ], |
129
|
|
|
[ "0x0123456789ABCDEF", Token::HEX, "0123456789ABCDEF" ], |
130
|
|
|
|
131
|
|
|
// binary literals |
132
|
|
|
[ "b''", Token::BIN, "" ], |
133
|
|
|
[ "b'0'", Token::BIN, "0" ], |
134
|
|
|
[ "b'00011011'", Token::BIN, "00011011" ], |
135
|
|
|
|
136
|
|
|
// Invalid hex and binary literals - these should probably be failures, tbh. |
137
|
|
|
[ "x'GGG'", Token::IDENTIFIER, 'x'], |
138
|
|
|
[ '0Xabc', Token::NUMBER, '0'], |
139
|
|
|
[ "b'2'", Token::IDENTIFIER, 'b'], |
140
|
|
|
|
141
|
|
|
// unquoted identifiers |
142
|
|
|
// [ '1_', Token::IDENTIFIER, '1_' ], // TODO - make this pass |
143
|
|
|
[ '_', Token::IDENTIFIER, '_' ], |
144
|
|
|
[ '$', Token::IDENTIFIER, '$' ], |
145
|
|
|
[ 'a', Token::IDENTIFIER, 'a' ], |
146
|
|
|
[ 'abc', Token::IDENTIFIER, 'abc' ], |
147
|
|
|
[ 'abc123', Token::IDENTIFIER, 'abc123' ], |
148
|
|
|
[ '_abc', Token::IDENTIFIER, '_abc' ], |
149
|
|
|
[ '_123', Token::IDENTIFIER, '_123' ], |
150
|
|
|
[ '$_123abc', Token::IDENTIFIER, '$_123abc' ], |
151
|
|
|
|
152
|
|
|
// symbols |
153
|
|
|
[ "<=_", Token::SYMBOL, "<=" ], |
154
|
|
|
[ ">=_", Token::SYMBOL, ">=" ], |
155
|
|
|
[ "<>_", Token::SYMBOL, "<>" ], |
156
|
|
|
[ "!=_", Token::SYMBOL, "!=" ], |
157
|
|
|
[ ":=_", Token::SYMBOL, ":=" ], |
158
|
|
|
[ "&&_", Token::SYMBOL, "&&" ], |
159
|
|
|
[ "||_", Token::SYMBOL, "||" ], |
160
|
|
|
[ "@@_", Token::SYMBOL, "@@" ], |
161
|
|
|
[ "@_", Token::SYMBOL, "@" ], |
162
|
|
|
[ "+_", Token::SYMBOL, "+" ], |
163
|
|
|
[ "-_", Token::SYMBOL, "-" ], |
164
|
|
|
[ "*_", Token::SYMBOL, "*" ], |
165
|
|
|
[ "/_", Token::SYMBOL, "/" ], |
166
|
|
|
[ "%_", Token::SYMBOL, "%" ], |
167
|
|
|
]; |
168
|
|
|
} |
169
|
|
|
|
170
|
|
|
/** |
171
|
|
|
* @param string $text |
172
|
|
|
* @dataProvider provideBadLogicNextToken |
173
|
|
|
* @expectedException LogicException |
174
|
|
|
*/ |
175
|
|
|
public function testBadLogicNextToken($text) |
176
|
|
|
{ |
177
|
|
|
$stream = $this->makeStream($text); |
178
|
|
|
$stream->nextToken(); |
179
|
|
|
} |
180
|
|
|
|
181
|
|
|
/** |
182
|
|
|
* @return array |
183
|
|
|
*/ |
184
|
|
|
public function provideBadLogicNextToken() |
185
|
|
|
{ |
186
|
|
|
return [ |
187
|
|
|
// All of these are explicitly not valid and will result in a "Lexer is confused by ..." message. |
188
|
|
|
['?'], |
189
|
|
|
['['], |
190
|
|
|
[']'], |
191
|
|
|
['\\'], |
192
|
|
|
['{'], |
193
|
|
|
['}'], |
194
|
|
|
// This item covers the fall through value for any characters not explicitly listed |
195
|
|
|
[chr(0)], |
196
|
|
|
]; |
197
|
|
|
} |
198
|
|
|
|
199
|
|
|
/** |
200
|
|
|
* @param string $text |
201
|
|
|
* @dataProvider provideBadRuntimeNextToken |
202
|
|
|
* @expectedException RuntimeException |
203
|
|
|
*/ |
204
|
|
|
public function testBadRuntimeNextToken($text) |
205
|
|
|
{ |
206
|
|
|
$stream = $this->makeStream($text); |
207
|
|
|
$stream->nextToken(); |
208
|
|
|
} |
209
|
|
|
|
210
|
|
|
/** |
211
|
|
|
* @return array |
212
|
|
|
*/ |
213
|
|
|
public function provideBadRuntimeNextToken() |
214
|
|
|
{ |
215
|
|
|
return [ |
216
|
|
|
// Unterminated quoted identifier |
217
|
|
|
['`foo'], |
218
|
|
|
// Unterminated '/*' |
219
|
|
|
['/*'], |
220
|
|
|
// Unexpected end of comment |
221
|
|
|
['*/'], |
222
|
|
|
// Unterminated string |
223
|
|
|
["'foo"], |
224
|
|
|
// Invalid hex literal (not an even number of digits) |
225
|
|
|
['0xaaa'], |
226
|
|
|
]; |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
public function testRewind() |
230
|
|
|
{ |
231
|
|
|
$stream = $this->makeStream("create table t (x int, y int)"); |
232
|
|
|
$stream->nextToken(); |
233
|
|
|
$mark = $stream->getMark(); |
234
|
|
|
$token11 = $stream->nextToken(); |
235
|
|
|
$token12 = $stream->nextToken(); |
236
|
|
|
$stream->rewind($mark); |
237
|
|
|
$token21 = $stream->nextToken(); |
238
|
|
|
$token22 = $stream->nextToken(); |
239
|
|
|
|
240
|
|
|
$this->assertTokenEq($token21->type, $token21->text, $token11); |
241
|
|
|
$this->assertTokenEq($token22->type, $token22->text, $token12); |
242
|
|
|
} |
243
|
|
|
|
244
|
|
|
/** |
245
|
|
|
* @dataProvider consumeProvider |
246
|
|
|
* @param string $text |
247
|
|
|
* @param mixed $spec |
248
|
|
|
* @param bool $success |
249
|
|
|
* @param string $type |
250
|
|
|
* @param string $value |
251
|
|
|
*/ |
252
|
|
|
public function testConsume($text, $spec, $success, $type, $value) |
253
|
|
|
{ |
254
|
|
|
$stream = $this->makeStream($text); |
255
|
|
|
$this->assertSame( |
256
|
|
|
(bool)$success, |
257
|
|
|
(bool)$stream->consume($spec), |
258
|
|
|
"consume did not return " . ($success ? 'true' : 'false') |
259
|
|
|
); |
260
|
|
|
$token = $stream->nextToken(); |
261
|
|
|
$this->assertTokenEq($type, $value, $token); |
262
|
|
|
} |
263
|
|
|
|
264
|
|
|
/** |
265
|
|
|
* @return array |
266
|
|
|
*/ |
267
|
|
|
public function consumeProvider() |
268
|
|
|
{ |
269
|
|
|
return [ |
270
|
|
|
['create table t', 'create', true, Token::IDENTIFIER, 'table'], |
271
|
|
|
['create table t', 'create table', true, Token::IDENTIFIER, 't'], |
272
|
|
|
['create table t', 'drop', false, Token::IDENTIFIER, 'create'], |
273
|
|
|
['create table t', 'drop table', false, Token::IDENTIFIER, 'create'], |
274
|
|
|
['create table t', 'create database', false, Token::IDENTIFIER, 'create'], |
275
|
|
|
['= "test"', [[Token::SYMBOL, '=']], true, Token::STRING, 'test'], |
276
|
|
|
['= "test"', [[Token::NUMBER, 1 ]], false, Token::SYMBOL, '='], |
277
|
|
|
['();', [[Token::SYMBOL, '('], |
278
|
|
|
[Token::SYMBOL, ')']], true, Token::SYMBOL, ';'], |
279
|
|
|
]; |
280
|
|
|
} |
281
|
|
|
|
282
|
|
|
/** |
283
|
|
|
* @dataProvider peekProvider |
284
|
|
|
* @param string $text |
285
|
|
|
* @param mixed $spec |
286
|
|
|
* @param bool $success |
287
|
|
|
* @param string $type |
288
|
|
|
* @param string $value |
289
|
|
|
*/ |
290
|
|
|
public function testPeek($text, $spec, $success, $type, $value) |
291
|
|
|
{ |
292
|
|
|
$stream = $this->makeStream($text); |
293
|
|
|
$this->assertSame( |
294
|
|
|
(bool)$success, |
295
|
|
|
(bool)$stream->peek($spec), |
296
|
|
|
"peek did not return " . ($success ? 'true' : 'false') |
297
|
|
|
); |
298
|
|
|
$token = $stream->nextToken(); |
299
|
|
|
$this->assertTokenEq($type, $value, $token); |
300
|
|
|
} |
301
|
|
|
|
302
|
|
|
/** |
303
|
|
|
* @return array |
304
|
|
|
*/ |
305
|
|
|
public function peekProvider() |
306
|
|
|
{ |
307
|
|
|
return [ |
308
|
|
|
['create table t', 'create', true, Token::IDENTIFIER, 'create'], |
309
|
|
|
['create table t', 'create table', true, Token::IDENTIFIER, 'create'], |
310
|
|
|
['create table t', 'drop', false, Token::IDENTIFIER, 'create'], |
311
|
|
|
['create table t', 'drop table', false, Token::IDENTIFIER, 'create'], |
312
|
|
|
['create table t', 'create database', false, Token::IDENTIFIER, 'create'], |
313
|
|
|
['= "test"', [[Token::SYMBOL, '=']], true, Token::SYMBOL, '='], |
314
|
|
|
['();', [[Token::SYMBOL, '('], |
315
|
|
|
[Token::SYMBOL, ')']], true, Token::SYMBOL, '('], |
316
|
|
|
]; |
317
|
|
|
} |
318
|
|
|
|
319
|
|
|
public function testExpectSucc() |
320
|
|
|
{ |
321
|
|
|
$stream = $this->makeStream('create table t'); |
322
|
|
|
$stream->expect(Token::IDENTIFIER, 'create'); |
323
|
|
|
} |
324
|
|
|
|
325
|
|
|
/** @expectedException Exception */ |
326
|
|
|
public function testExpectFail() |
327
|
|
|
{ |
328
|
|
|
$stream = $this->makeStream('create table t'); |
329
|
|
|
$stream->expect(Token::IDENTIFIER, 'drop'); |
330
|
|
|
} |
331
|
|
|
|
332
|
|
|
/** |
333
|
|
|
* @param string $func |
334
|
|
|
* @param string $token |
335
|
|
|
* @param mixed $expected |
336
|
|
|
* @param bool $throwsException |
337
|
|
|
* @dataProvider provideExpectedTokenType |
338
|
|
|
*/ |
339
|
|
|
public function testExpectedTokenType($func, $token, $expected, $throwsException) |
340
|
|
|
{ |
341
|
|
|
$stream = $this->makeStream($token); |
342
|
|
|
if ($throwsException) { |
343
|
|
|
$this->expectException(RuntimeException::class); |
344
|
|
|
$stream->$func(); |
345
|
|
|
} else { |
346
|
|
|
$result = $stream->$func(); |
347
|
|
|
$this->assertEquals($expected, $result); |
348
|
|
|
} |
349
|
|
|
} |
350
|
|
|
|
351
|
|
|
/** |
352
|
|
|
* @return array |
353
|
|
|
*/ |
354
|
|
|
public function provideExpectedTokenType() |
355
|
|
|
{ |
356
|
|
|
return [ |
357
|
|
|
// [ function name, token, expected value, should it throw a RuntimeException? ] |
358
|
|
|
|
359
|
|
|
[ 'expectCloseParen', ')', ')', false], |
360
|
|
|
[ 'expectCloseParen', 'a', null, true], |
361
|
|
|
|
362
|
|
|
[ 'expectOpenParen', '(', '(', false], |
363
|
|
|
[ 'expectOpenParen', 'a', null, true], |
364
|
|
|
|
365
|
|
|
[ 'expectName', 'foo', 'foo', false], |
366
|
|
|
[ 'expectName', '1', null, true], |
367
|
|
|
|
368
|
|
|
[ 'expectNumber', '1', 1, false], |
369
|
|
|
[ 'expectNumber', 'a', null, true], |
370
|
|
|
|
371
|
|
|
// An embedded string |
372
|
|
|
[ 'expectString', "'a'", "a", false], |
373
|
|
|
[ 'expectString', 'a', null, true], |
374
|
|
|
|
375
|
|
|
[ 'expectStringExtended', "'a'", "a", false], |
376
|
|
|
[ 'expectStringExtended', "x'68656c6c6f21'", 'hello!', false], |
377
|
|
|
[ 'expectStringExtended', "X'68656c6c6f21'", 'hello!', false], |
378
|
|
|
[ 'expectStringExtended', '0x68656c6c6f21', 'hello!', false], |
379
|
|
|
[ 'expectStringExtended', "b'0111111000100011'", '~#', false], |
380
|
|
|
[ 'expectStringExtended', 'a', null, true], |
381
|
|
|
|
382
|
|
|
]; |
383
|
|
|
} |
384
|
|
|
|
385
|
|
|
/** |
386
|
|
|
* @param string $conditionalComment |
387
|
|
|
* @param array $tokenTypes |
388
|
|
|
* @dataProvider conditionalCommentProvider |
389
|
|
|
*/ |
390
|
|
|
public function testConditionalComment($conditionalComment, array $tokenTypes) |
391
|
|
|
{ |
392
|
|
|
$stream = $this->makeStream($conditionalComment); |
393
|
|
|
$token = $stream->nextToken(); |
394
|
|
|
$expectedType = current($tokenTypes); |
395
|
|
|
|
396
|
|
|
while ($token->type != Token::EOF) { |
397
|
|
|
$this->assertEquals($expectedType, $token->type); |
398
|
|
|
|
399
|
|
|
$token = $stream->nextToken(); |
400
|
|
|
$expectedType = next($tokenTypes); |
401
|
|
|
} |
402
|
|
|
} |
403
|
|
|
|
404
|
|
|
/** |
405
|
|
|
* @return array |
406
|
|
|
*/ |
407
|
|
|
public function conditionalCommentProvider() |
408
|
|
|
{ |
409
|
|
|
return [ |
410
|
|
|
// [ conditional comment, list of expected token types ] |
411
|
|
|
[ |
412
|
|
|
'/*! abcde fghij */', |
413
|
|
|
[ |
414
|
|
|
Token::IDENTIFIER, |
415
|
|
|
Token::IDENTIFIER, |
416
|
|
|
], |
417
|
|
|
], |
418
|
|
|
[ |
419
|
|
|
'/*!12345 fghij */', |
420
|
|
|
[ |
421
|
|
|
Token::IDENTIFIER, |
422
|
|
|
], |
423
|
|
|
], |
424
|
|
|
]; |
425
|
|
|
} |
426
|
|
|
|
427
|
|
|
/** |
428
|
|
|
* @throws ReflectionException |
429
|
|
|
*/ |
430
|
|
|
public function testContextualise() |
431
|
|
|
{ |
432
|
|
|
$sql = <<<EOF |
433
|
|
|
CREATE TABLE `foo` ( |
434
|
|
|
`a` bar DEFAULT NULL |
435
|
|
|
); |
436
|
|
|
EOF; |
437
|
|
|
$expected = <<<EOF |
438
|
|
|
data://text/plain;base64,Q1JFQVRFIFRBQkxFIGBmb29gICgKICAgIGBhYCBiYXIgREVGQVVMVCBOVUxMCik7, line 2: unknown datatype 'bar' |
439
|
|
|
1: CREATE TABLE `foo` ( |
440
|
|
|
2: `a` bar<<HERE>> DEFAULT NULL |
441
|
|
|
EOF; |
442
|
|
|
|
443
|
|
|
$stream = $this->makeStream($sql); |
444
|
|
|
|
445
|
|
|
// Use reflection to set the internal offset to the place where error is. |
446
|
|
|
$reflection = new ReflectionClass($stream); |
447
|
|
|
$property = $reflection->getProperty('offset'); |
448
|
|
|
$property->setAccessible(true); |
449
|
|
|
$property->setValue($stream, 32); |
450
|
|
|
|
451
|
|
|
$message = $stream->contextualise("unknown datatype 'bar'"); |
452
|
|
|
$this->assertEquals($expected, $message); |
453
|
|
|
} |
454
|
|
|
} |
455
|
|
|
|