1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* RegExp token matcher. |
5
|
|
|
* |
6
|
|
|
* Auto-generated file, please don't edit manually. |
7
|
|
|
* Generated by UniLex. |
8
|
|
|
*/ |
9
|
|
|
|
10
|
|
|
declare(strict_types=1); |
11
|
|
|
|
12
|
|
|
namespace Remorhaz\UniLex\RegExp\Grammar; |
13
|
|
|
|
14
|
|
|
use Remorhaz\UniLex\IO\CharBufferInterface; |
15
|
|
|
use Remorhaz\UniLex\Lexer\TokenFactoryInterface; |
16
|
|
|
use Remorhaz\UniLex\Lexer\TokenMatcherTemplate; |
17
|
|
|
|
18
|
|
|
class TokenMatcher extends TokenMatcherTemplate |
19
|
|
|
{ |
20
|
|
|
public function match(CharBufferInterface $buffer, TokenFactoryInterface $tokenFactory): bool |
21
|
|
|
{ |
22
|
|
|
$context = $this->createContext($buffer, $tokenFactory); |
23
|
|
|
goto state1; |
24
|
|
|
|
25
|
|
|
state1: |
26
|
|
|
if ($context->getBuffer()->isEnd()) { |
27
|
|
|
goto error; |
28
|
|
|
} |
29
|
|
|
$char = $context->getBuffer()->getSymbol(); |
30
|
|
|
if (0x00 <= $char && $char <= 0x1F) { |
31
|
|
|
$context->getBuffer()->nextSymbol(); |
32
|
|
|
// [\u0000-\u001F] |
33
|
|
|
$context |
34
|
|
|
->setNewToken(TokenType::CTL_ASCII) |
35
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
36
|
|
|
|
37
|
|
|
return true; |
38
|
|
|
} |
39
|
|
|
if ( |
40
|
|
|
0x20 <= $char && $char <= 0x23 || |
41
|
|
|
0x25 <= $char && $char <= 0x27 || |
42
|
|
|
0x2F == $char || |
43
|
|
|
0x3A <= $char && $char <= 0x3E || |
44
|
|
|
0x40 == $char || |
45
|
|
|
0x5F == $char || |
46
|
|
|
0x60 == $char || |
47
|
|
|
0x7E == $char |
48
|
|
|
) { |
49
|
|
|
$context->getBuffer()->nextSymbol(); |
50
|
|
|
// [ -#%-'/:->@_~`] |
51
|
|
|
$context |
52
|
|
|
->setNewToken(TokenType::PRINTABLE_ASCII_OTHER) |
53
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
54
|
|
|
|
55
|
|
|
return true; |
56
|
|
|
} |
57
|
|
|
if (0x24 == $char) { |
58
|
|
|
$context->getBuffer()->nextSymbol(); |
59
|
|
|
// \$ |
60
|
|
|
$context |
61
|
|
|
->setNewToken(TokenType::DOLLAR) |
62
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
63
|
|
|
|
64
|
|
|
return true; |
65
|
|
|
} |
66
|
|
|
if (0x28 == $char) { |
67
|
|
|
$context->getBuffer()->nextSymbol(); |
68
|
|
|
// \( |
69
|
|
|
$context |
70
|
|
|
->setNewToken(TokenType::LEFT_BRACKET) |
71
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
72
|
|
|
|
73
|
|
|
return true; |
74
|
|
|
} |
75
|
|
|
if (0x29 == $char) { |
76
|
|
|
$context->getBuffer()->nextSymbol(); |
77
|
|
|
// \) |
78
|
|
|
$context |
79
|
|
|
->setNewToken(TokenType::RIGHT_BRACKET) |
80
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
81
|
|
|
|
82
|
|
|
return true; |
83
|
|
|
} |
84
|
|
|
if (0x2A == $char) { |
85
|
|
|
$context->getBuffer()->nextSymbol(); |
86
|
|
|
// \u002A |
87
|
|
|
$context |
88
|
|
|
->setNewToken(TokenType::STAR) |
89
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
90
|
|
|
|
91
|
|
|
return true; |
92
|
|
|
} |
93
|
|
|
if (0x2B == $char) { |
94
|
|
|
$context->getBuffer()->nextSymbol(); |
95
|
|
|
// \+ |
96
|
|
|
$context |
97
|
|
|
->setNewToken(TokenType::PLUS) |
98
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
99
|
|
|
|
100
|
|
|
return true; |
101
|
|
|
} |
102
|
|
|
if (0x2C == $char) { |
103
|
|
|
$context->getBuffer()->nextSymbol(); |
104
|
|
|
// , |
105
|
|
|
$context |
106
|
|
|
->setNewToken(TokenType::COMMA) |
107
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
108
|
|
|
|
109
|
|
|
return true; |
110
|
|
|
} |
111
|
|
|
if (0x2D == $char) { |
112
|
|
|
$context->getBuffer()->nextSymbol(); |
113
|
|
|
// - |
114
|
|
|
$context |
115
|
|
|
->setNewToken(TokenType::HYPHEN) |
116
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
117
|
|
|
|
118
|
|
|
return true; |
119
|
|
|
} |
120
|
|
|
if (0x2E == $char) { |
121
|
|
|
$context->getBuffer()->nextSymbol(); |
122
|
|
|
// \. |
123
|
|
|
$context |
124
|
|
|
->setNewToken(TokenType::DOT) |
125
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
126
|
|
|
|
127
|
|
|
return true; |
128
|
|
|
} |
129
|
|
|
if (0x30 == $char) { |
130
|
|
|
$context->getBuffer()->nextSymbol(); |
131
|
|
|
// 0 |
132
|
|
|
$context |
133
|
|
|
->setNewToken(TokenType::DIGIT_ZERO) |
134
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char) |
135
|
|
|
->setTokenAttribute(TokenAttribute::DIGIT, chr($char)); |
136
|
|
|
|
137
|
|
|
return true; |
138
|
|
|
} |
139
|
|
|
if (0x31 <= $char && $char <= 0x37) { |
140
|
|
|
$context->getBuffer()->nextSymbol(); |
141
|
|
|
// [1-7] |
142
|
|
|
$context |
143
|
|
|
->setNewToken(TokenType::DIGIT_OCT) |
144
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char) |
145
|
|
|
->setTokenAttribute(TokenAttribute::DIGIT, chr($char)); |
146
|
|
|
|
147
|
|
|
return true; |
148
|
|
|
} |
149
|
|
|
if (0x38 == $char || 0x39 == $char) { |
150
|
|
|
$context->getBuffer()->nextSymbol(); |
151
|
|
|
// [8-9] |
152
|
|
|
$context |
153
|
|
|
->setNewToken(TokenType::DIGIT_DEC) |
154
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char) |
155
|
|
|
->setTokenAttribute(TokenAttribute::DIGIT, chr($char)); |
156
|
|
|
|
157
|
|
|
return true; |
158
|
|
|
} |
159
|
|
|
if (0x3F == $char) { |
160
|
|
|
$context->getBuffer()->nextSymbol(); |
161
|
|
|
// \? |
162
|
|
|
$context |
163
|
|
|
->setNewToken(TokenType::QUESTION) |
164
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
165
|
|
|
|
166
|
|
|
return true; |
167
|
|
|
} |
168
|
|
|
if (0x41 <= $char && $char <= 0x46 || 0x61 == $char || 0x62 == $char || 0x64 <= $char && $char <= 0x66) { |
169
|
|
|
$context->getBuffer()->nextSymbol(); |
170
|
|
|
// [A-Fa-bd-f] |
171
|
|
|
$context |
172
|
|
|
->setNewToken(TokenType::OTHER_HEX_LETTER) |
173
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char) |
174
|
|
|
->setTokenAttribute(TokenAttribute::DIGIT, chr($char)); |
175
|
|
|
|
176
|
|
|
return true; |
177
|
|
|
} |
178
|
|
|
if ( |
179
|
|
|
0x47 <= $char && $char <= 0x4F || |
180
|
|
|
0x51 <= $char && $char <= 0x5A || |
181
|
|
|
0x67 <= $char && $char <= 0x6E || |
182
|
|
|
0x71 <= $char && $char <= 0x74 || |
183
|
|
|
0x76 == $char || |
184
|
|
|
0x77 == $char || |
185
|
|
|
0x79 == $char || |
186
|
|
|
0x7A == $char |
187
|
|
|
) { |
188
|
|
|
$context->getBuffer()->nextSymbol(); |
189
|
|
|
// [G-OQ-Zg-nq-tvwyz] |
190
|
|
|
$context |
191
|
|
|
->setNewToken(TokenType::OTHER_ASCII_LETTER) |
192
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
193
|
|
|
|
194
|
|
|
return true; |
195
|
|
|
} |
196
|
|
|
if (0x50 == $char) { |
197
|
|
|
$context->getBuffer()->nextSymbol(); |
198
|
|
|
// P |
199
|
|
|
$context |
200
|
|
|
->setNewToken(TokenType::CAPITAL_P) |
201
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
202
|
|
|
|
203
|
|
|
return true; |
204
|
|
|
} |
205
|
|
|
if (0x5B == $char) { |
206
|
|
|
$context->getBuffer()->nextSymbol(); |
207
|
|
|
// \[ |
208
|
|
|
$context |
209
|
|
|
->setNewToken(TokenType::LEFT_SQUARE_BRACKET) |
210
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
211
|
|
|
|
212
|
|
|
return true; |
213
|
|
|
} |
214
|
|
|
if (0x5C == $char) { |
215
|
|
|
$context->getBuffer()->nextSymbol(); |
216
|
|
|
// \\ |
217
|
|
|
$context |
218
|
|
|
->setNewToken(TokenType::BACKSLASH) |
219
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
220
|
|
|
|
221
|
|
|
return true; |
222
|
|
|
} |
223
|
|
|
if (0x5D == $char) { |
224
|
|
|
$context->getBuffer()->nextSymbol(); |
225
|
|
|
// ] |
226
|
|
|
$context |
227
|
|
|
->setNewToken(TokenType::RIGHT_SQUARE_BRACKET) |
228
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
229
|
|
|
|
230
|
|
|
return true; |
231
|
|
|
} |
232
|
|
|
if (0x5E == $char) { |
233
|
|
|
$context->getBuffer()->nextSymbol(); |
234
|
|
|
// \^ |
235
|
|
|
$context |
236
|
|
|
->setNewToken(TokenType::CIRCUMFLEX) |
237
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
238
|
|
|
|
239
|
|
|
return true; |
240
|
|
|
} |
241
|
|
|
if (0x63 == $char) { |
242
|
|
|
$context->getBuffer()->nextSymbol(); |
243
|
|
|
// c |
244
|
|
|
$context |
245
|
|
|
->setNewToken(TokenType::SMALL_C) |
246
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char) |
247
|
|
|
->setTokenAttribute(TokenAttribute::DIGIT, chr($char)); |
248
|
|
|
|
249
|
|
|
return true; |
250
|
|
|
} |
251
|
|
|
if (0x6F == $char) { |
252
|
|
|
$context->getBuffer()->nextSymbol(); |
253
|
|
|
// o |
254
|
|
|
$context |
255
|
|
|
->setNewToken(TokenType::SMALL_O) |
256
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
257
|
|
|
|
258
|
|
|
return true; |
259
|
|
|
} |
260
|
|
|
if (0x70 == $char) { |
261
|
|
|
$context->getBuffer()->nextSymbol(); |
262
|
|
|
// p |
263
|
|
|
$context |
264
|
|
|
->setNewToken(TokenType::SMALL_P) |
265
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
266
|
|
|
|
267
|
|
|
return true; |
268
|
|
|
} |
269
|
|
|
if (0x75 == $char) { |
270
|
|
|
$context->getBuffer()->nextSymbol(); |
271
|
|
|
// u |
272
|
|
|
$context |
273
|
|
|
->setNewToken(TokenType::SMALL_U) |
274
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
275
|
|
|
|
276
|
|
|
return true; |
277
|
|
|
} |
278
|
|
|
if (0x78 == $char) { |
279
|
|
|
$context->getBuffer()->nextSymbol(); |
280
|
|
|
// x |
281
|
|
|
$context |
282
|
|
|
->setNewToken(TokenType::SMALL_X) |
283
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
284
|
|
|
|
285
|
|
|
return true; |
286
|
|
|
} |
287
|
|
|
if (0x7B == $char) { |
288
|
|
|
$context->getBuffer()->nextSymbol(); |
289
|
|
|
// \u007B |
290
|
|
|
$context |
291
|
|
|
->setNewToken(TokenType::LEFT_CURLY_BRACKET) |
292
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
293
|
|
|
|
294
|
|
|
return true; |
295
|
|
|
} |
296
|
|
|
if (0x7C == $char) { |
297
|
|
|
$context->getBuffer()->nextSymbol(); |
298
|
|
|
// \| |
299
|
|
|
$context |
300
|
|
|
->setNewToken(TokenType::VERTICAL_LINE) |
301
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
302
|
|
|
|
303
|
|
|
return true; |
304
|
|
|
} |
305
|
|
|
if (0x7D == $char) { |
306
|
|
|
$context->getBuffer()->nextSymbol(); |
307
|
|
|
// } |
308
|
|
|
$context |
309
|
|
|
->setNewToken(TokenType::RIGHT_CURLY_BRACKET) |
310
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
311
|
|
|
|
312
|
|
|
return true; |
313
|
|
|
} |
314
|
|
|
if (0x7F == $char) { |
315
|
|
|
$context->getBuffer()->nextSymbol(); |
316
|
|
|
// \u007F |
317
|
|
|
$context |
318
|
|
|
->setNewToken(TokenType::OTHER_ASCII) |
319
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
320
|
|
|
|
321
|
|
|
return true; |
322
|
|
|
} |
323
|
|
|
if (0x80 <= $char && $char <= 0x10FFFF) { |
324
|
|
|
$context->getBuffer()->nextSymbol(); |
325
|
|
|
// [\u0080-\x{10FFFF}] |
326
|
|
|
$context |
327
|
|
|
->setNewToken(TokenType::NOT_ASCII) |
328
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
329
|
|
|
|
330
|
|
|
return true; |
331
|
|
|
} |
332
|
|
|
goto error; |
333
|
|
|
|
334
|
|
|
error: |
335
|
|
|
if ($context->getBuffer()->isEnd()) { |
336
|
|
|
return false; |
337
|
|
|
} |
338
|
|
|
$char = $context->getBuffer()->getSymbol(); |
339
|
|
|
$context->getBuffer()->nextSymbol(); |
340
|
|
|
$context |
341
|
|
|
->setNewToken(TokenType::INVALID) |
342
|
|
|
->setTokenAttribute(TokenAttribute::CODE, $char); |
343
|
|
|
return true; |
344
|
|
|
} |
345
|
|
|
} |
346
|
|
|
|