|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace PhpParser; |
|
4
|
|
|
|
|
5
|
|
|
use PhpParser\Parser\Tokens; |
|
6
|
|
|
|
|
7
|
|
|
class Lexer |
|
8
|
|
|
{ |
|
9
|
|
|
protected $code; |
|
10
|
|
|
protected $tokens; |
|
11
|
|
|
protected $pos; |
|
12
|
|
|
protected $line; |
|
13
|
|
|
protected $filePos; |
|
14
|
|
|
|
|
15
|
|
|
protected $tokenMap; |
|
16
|
|
|
protected $dropTokens; |
|
17
|
|
|
|
|
18
|
|
|
protected $usedAttributes; |
|
19
|
|
|
|
|
20
|
|
|
/** |
|
21
|
|
|
* Creates a Lexer. |
|
22
|
|
|
* |
|
23
|
|
|
* @param array $options Options array. Currently only the 'usedAttributes' option is supported, |
|
24
|
|
|
* which is an array of attributes to add to the AST nodes. Possible attributes |
|
25
|
|
|
* are: 'comments', 'startLine', 'endLine', 'startTokenPos', 'endTokenPos', |
|
26
|
|
|
* 'startFilePos', 'endFilePos'. The option defaults to the first three. |
|
27
|
|
|
* For more info see getNextToken() docs. |
|
28
|
|
|
*/ |
|
29
|
|
|
public function __construct(array $options = array()) { |
|
30
|
|
|
// map from internal tokens to PhpParser tokens |
|
31
|
|
|
$this->tokenMap = $this->createTokenMap(); |
|
32
|
|
|
|
|
33
|
|
|
// map of tokens to drop while lexing (the map is only used for isset lookup, |
|
34
|
|
|
// that's why the value is simply set to 1; the value is never actually used.) |
|
35
|
|
|
$this->dropTokens = array_fill_keys(array(T_WHITESPACE, T_OPEN_TAG), 1); |
|
36
|
|
|
|
|
37
|
|
|
// the usedAttributes member is a map of the used attribute names to a dummy |
|
38
|
|
|
// value (here "true") |
|
39
|
|
|
$options += array( |
|
40
|
|
|
'usedAttributes' => array('comments', 'startLine', 'endLine'), |
|
41
|
|
|
); |
|
42
|
|
|
$this->usedAttributes = array_fill_keys($options['usedAttributes'], true); |
|
43
|
|
|
} |
|
44
|
|
|
|
|
45
|
|
|
/** |
|
46
|
|
|
* Initializes the lexer for lexing the provided source code. |
|
47
|
|
|
* |
|
48
|
|
|
* @param string $code The source code to lex |
|
49
|
|
|
* |
|
50
|
|
|
* @throws Error on lexing errors (unterminated comment or unexpected character) |
|
51
|
|
|
*/ |
|
52
|
|
|
public function startLexing($code) { |
|
53
|
|
|
$scream = ini_set('xdebug.scream', '0'); |
|
54
|
|
|
|
|
55
|
|
|
$this->resetErrors(); |
|
56
|
|
|
$this->tokens = @token_get_all($code); |
|
57
|
|
|
$this->handleErrors(); |
|
58
|
|
|
|
|
59
|
|
|
if (false !== $scream) { |
|
60
|
|
|
ini_set('xdebug.scream', $scream); |
|
61
|
|
|
} |
|
62
|
|
|
|
|
63
|
|
|
$this->code = $code; // keep the code around for __halt_compiler() handling |
|
64
|
|
|
$this->pos = -1; |
|
65
|
|
|
$this->line = 1; |
|
66
|
|
|
$this->filePos = 0; |
|
67
|
|
|
} |
|
68
|
|
|
|
|
69
|
|
|
protected function resetErrors() { |
|
70
|
|
|
if (function_exists('error_clear_last')) { |
|
71
|
|
|
error_clear_last(); |
|
72
|
|
|
} else { |
|
73
|
|
|
// set error_get_last() to defined state by forcing an undefined variable error |
|
74
|
|
|
set_error_handler(function() { return false; }, 0); |
|
75
|
|
|
@$undefinedVariable; |
|
76
|
|
|
restore_error_handler(); |
|
77
|
|
|
} |
|
78
|
|
|
} |
|
79
|
|
|
|
|
80
|
|
|
protected function handleErrors() { |
|
81
|
|
|
$error = error_get_last(); |
|
82
|
|
|
if (null === $error) { |
|
83
|
|
|
return; |
|
84
|
|
|
} |
|
85
|
|
|
|
|
86
|
|
View Code Duplication |
if (preg_match( |
|
87
|
|
|
'~^Unterminated comment starting line ([0-9]+)$~', |
|
88
|
|
|
$error['message'], $matches |
|
89
|
|
|
)) { |
|
90
|
|
|
throw new Error('Unterminated comment', (int) $matches[1]); |
|
91
|
|
|
} |
|
92
|
|
|
|
|
93
|
|
View Code Duplication |
if (preg_match( |
|
94
|
|
|
'~^Unexpected character in input: \'(.)\' \(ASCII=([0-9]+)\)~s', |
|
95
|
|
|
$error['message'], $matches |
|
96
|
|
|
)) { |
|
97
|
|
|
throw new Error(sprintf( |
|
98
|
|
|
'Unexpected character "%s" (ASCII %d)', |
|
99
|
|
|
$matches[1], $matches[2] |
|
100
|
|
|
)); |
|
101
|
|
|
} |
|
102
|
|
|
|
|
103
|
|
|
// PHP cuts error message after null byte, so need special case |
|
104
|
|
|
if (preg_match('~^Unexpected character in input: \'$~', $error['message'])) { |
|
105
|
|
|
throw new Error('Unexpected null byte'); |
|
106
|
|
|
} |
|
107
|
|
|
} |
|
108
|
|
|
|
|
109
|
|
|
/** |
|
110
|
|
|
* Fetches the next token. |
|
111
|
|
|
* |
|
112
|
|
|
* The available attributes are determined by the 'usedAttributes' option, which can |
|
113
|
|
|
* be specified in the constructor. The following attributes are supported: |
|
114
|
|
|
* |
|
115
|
|
|
* * 'comments' => Array of PhpParser\Comment or PhpParser\Comment\Doc instances, |
|
116
|
|
|
* representing all comments that occurred between the previous |
|
117
|
|
|
* non-discarded token and the current one. |
|
118
|
|
|
* * 'startLine' => Line in which the node starts. |
|
119
|
|
|
* * 'endLine' => Line in which the node ends. |
|
120
|
|
|
* * 'startTokenPos' => Offset into the token array of the first token in the node. |
|
121
|
|
|
* * 'endTokenPos' => Offset into the token array of the last token in the node. |
|
122
|
|
|
* * 'startFilePos' => Offset into the code string of the first character that is part of the node. |
|
123
|
|
|
* * 'endFilePos' => Offset into the code string of the last character that is part of the node |
|
124
|
|
|
* |
|
125
|
|
|
* @param mixed $value Variable to store token content in |
|
126
|
|
|
* @param mixed $startAttributes Variable to store start attributes in |
|
127
|
|
|
* @param mixed $endAttributes Variable to store end attributes in |
|
128
|
|
|
* |
|
129
|
|
|
* @return int Token id |
|
130
|
|
|
*/ |
|
131
|
|
|
public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) { |
|
132
|
|
|
$startAttributes = array(); |
|
133
|
|
|
$endAttributes = array(); |
|
134
|
|
|
|
|
135
|
|
|
while (1) { |
|
136
|
|
|
if (isset($this->tokens[++$this->pos])) { |
|
137
|
|
|
$token = $this->tokens[$this->pos]; |
|
138
|
|
|
} else { |
|
139
|
|
|
// EOF token with ID 0 |
|
140
|
|
|
$token = "\0"; |
|
141
|
|
|
} |
|
142
|
|
|
|
|
143
|
|
|
if (isset($this->usedAttributes['startTokenPos'])) { |
|
144
|
|
|
$startAttributes['startTokenPos'] = $this->pos; |
|
145
|
|
|
} |
|
146
|
|
|
if (isset($this->usedAttributes['startFilePos'])) { |
|
147
|
|
|
$startAttributes['startFilePos'] = $this->filePos; |
|
148
|
|
|
} |
|
149
|
|
|
|
|
150
|
|
|
if (is_string($token)) { |
|
151
|
|
|
// bug in token_get_all |
|
152
|
|
|
if ('b"' === $token) { |
|
153
|
|
|
$value = 'b"'; |
|
154
|
|
|
$this->filePos += 2; |
|
155
|
|
|
$id = ord('"'); |
|
156
|
|
|
} else { |
|
157
|
|
|
$value = $token; |
|
158
|
|
|
$this->filePos += 1; |
|
159
|
|
|
$id = ord($token); |
|
160
|
|
|
} |
|
161
|
|
|
|
|
162
|
|
|
if (isset($this->usedAttributes['startLine'])) { |
|
163
|
|
|
$startAttributes['startLine'] = $this->line; |
|
164
|
|
|
} |
|
165
|
|
|
if (isset($this->usedAttributes['endLine'])) { |
|
166
|
|
|
$endAttributes['endLine'] = $this->line; |
|
167
|
|
|
} |
|
168
|
|
|
if (isset($this->usedAttributes['endTokenPos'])) { |
|
169
|
|
|
$endAttributes['endTokenPos'] = $this->pos; |
|
170
|
|
|
} |
|
171
|
|
|
if (isset($this->usedAttributes['endFilePos'])) { |
|
172
|
|
|
$endAttributes['endFilePos'] = $this->filePos - 1; |
|
173
|
|
|
} |
|
174
|
|
|
|
|
175
|
|
|
return $id; |
|
176
|
|
|
} else { |
|
177
|
|
|
$this->line += substr_count($token[1], "\n"); |
|
178
|
|
|
$this->filePos += strlen($token[1]); |
|
179
|
|
|
|
|
180
|
|
|
if (T_COMMENT === $token[0]) { |
|
181
|
|
View Code Duplication |
if (isset($this->usedAttributes['comments'])) { |
|
182
|
|
|
$startAttributes['comments'][] = new Comment($token[1], $token[2]); |
|
183
|
|
|
} |
|
184
|
|
|
} elseif (T_DOC_COMMENT === $token[0]) { |
|
185
|
|
View Code Duplication |
if (isset($this->usedAttributes['comments'])) { |
|
186
|
|
|
$startAttributes['comments'][] = new Comment\Doc($token[1], $token[2]); |
|
187
|
|
|
} |
|
188
|
|
|
} elseif (!isset($this->dropTokens[$token[0]])) { |
|
189
|
|
|
$value = $token[1]; |
|
190
|
|
|
|
|
191
|
|
|
if (isset($this->usedAttributes['startLine'])) { |
|
192
|
|
|
$startAttributes['startLine'] = $token[2]; |
|
193
|
|
|
} |
|
194
|
|
|
if (isset($this->usedAttributes['endLine'])) { |
|
195
|
|
|
$endAttributes['endLine'] = $this->line; |
|
196
|
|
|
} |
|
197
|
|
|
if (isset($this->usedAttributes['endTokenPos'])) { |
|
198
|
|
|
$endAttributes['endTokenPos'] = $this->pos; |
|
199
|
|
|
} |
|
200
|
|
|
if (isset($this->usedAttributes['endFilePos'])) { |
|
201
|
|
|
$endAttributes['endFilePos'] = $this->filePos - 1; |
|
202
|
|
|
} |
|
203
|
|
|
|
|
204
|
|
|
return $this->tokenMap[$token[0]]; |
|
205
|
|
|
} |
|
206
|
|
|
} |
|
207
|
|
|
} |
|
208
|
|
|
|
|
209
|
|
|
throw new \RuntimeException('Reached end of lexer loop'); |
|
210
|
|
|
} |
|
211
|
|
|
|
|
212
|
|
|
/** |
|
213
|
|
|
* Returns the token array for current code. |
|
214
|
|
|
* |
|
215
|
|
|
* The token array is in the same format as provided by the |
|
216
|
|
|
* token_get_all() function and does not discard tokens (i.e. |
|
217
|
|
|
* whitespace and comments are included). The token position |
|
218
|
|
|
* attributes are against this token array. |
|
219
|
|
|
* |
|
220
|
|
|
* @return array Array of tokens in token_get_all() format |
|
221
|
|
|
*/ |
|
222
|
|
|
public function getTokens() { |
|
223
|
|
|
return $this->tokens; |
|
224
|
|
|
} |
|
225
|
|
|
|
|
226
|
|
|
/** |
|
227
|
|
|
* Handles __halt_compiler() by returning the text after it. |
|
228
|
|
|
* |
|
229
|
|
|
* @return string Remaining text |
|
230
|
|
|
*/ |
|
231
|
|
|
public function handleHaltCompiler() { |
|
232
|
|
|
// text after T_HALT_COMPILER, still including (); |
|
233
|
|
|
$textAfter = substr($this->code, $this->filePos); |
|
234
|
|
|
|
|
235
|
|
|
// ensure that it is followed by (); |
|
236
|
|
|
// this simplifies the situation, by not allowing any comments |
|
237
|
|
|
// in between of the tokens. |
|
238
|
|
|
if (!preg_match('~^\s*\(\s*\)\s*(?:;|\?>\r?\n?)~', $textAfter, $matches)) { |
|
239
|
|
|
throw new Error('__HALT_COMPILER must be followed by "();"'); |
|
240
|
|
|
} |
|
241
|
|
|
|
|
242
|
|
|
// prevent the lexer from returning any further tokens |
|
243
|
|
|
$this->pos = count($this->tokens); |
|
244
|
|
|
|
|
245
|
|
|
// return with (); removed |
|
246
|
|
|
return (string) substr($textAfter, strlen($matches[0])); // (string) converts false to '' |
|
247
|
|
|
} |
|
248
|
|
|
|
|
249
|
|
|
/** |
|
250
|
|
|
* Creates the token map. |
|
251
|
|
|
* |
|
252
|
|
|
* The token map maps the PHP internal token identifiers |
|
253
|
|
|
* to the identifiers used by the Parser. Additionally it |
|
254
|
|
|
* maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'. |
|
255
|
|
|
* |
|
256
|
|
|
* @return array The token map |
|
257
|
|
|
*/ |
|
258
|
|
|
protected function createTokenMap() { |
|
259
|
|
|
$tokenMap = array(); |
|
260
|
|
|
|
|
261
|
|
|
// 256 is the minimum possible token number, as everything below |
|
262
|
|
|
// it is an ASCII value |
|
263
|
|
|
for ($i = 256; $i < 1000; ++$i) { |
|
264
|
|
|
if (T_DOUBLE_COLON === $i) { |
|
265
|
|
|
// T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM |
|
266
|
|
|
$tokenMap[$i] = Tokens::T_PAAMAYIM_NEKUDOTAYIM; |
|
267
|
|
|
} elseif(T_OPEN_TAG_WITH_ECHO === $i) { |
|
268
|
|
|
// T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO |
|
269
|
|
|
$tokenMap[$i] = Tokens::T_ECHO; |
|
270
|
|
|
} elseif(T_CLOSE_TAG === $i) { |
|
271
|
|
|
// T_CLOSE_TAG is equivalent to ';' |
|
272
|
|
|
$tokenMap[$i] = ord(';'); |
|
273
|
|
|
} elseif ('UNKNOWN' !== $name = token_name($i)) { |
|
274
|
|
|
if ('T_HASHBANG' === $name) { |
|
275
|
|
|
// HHVM uses a special token for #! hashbang lines |
|
276
|
|
|
$tokenMap[$i] = Tokens::T_INLINE_HTML; |
|
277
|
|
|
} else if (defined($name = 'PhpParser\Parser\Tokens::' . $name)) { |
|
278
|
|
|
// Other tokens can be mapped directly |
|
279
|
|
|
$tokenMap[$i] = constant($name); |
|
280
|
|
|
} |
|
281
|
|
|
} |
|
282
|
|
|
} |
|
283
|
|
|
|
|
284
|
|
|
// HHVM uses a special token for numbers that overflow to double |
|
285
|
|
|
if (defined('T_ONUMBER')) { |
|
286
|
|
|
$tokenMap[T_ONUMBER] = Tokens::T_DNUMBER; |
|
287
|
|
|
} |
|
288
|
|
|
// HHVM also has a separate token for the __COMPILER_HALT_OFFSET__ constant |
|
289
|
|
|
if (defined('T_COMPILER_HALT_OFFSET')) { |
|
290
|
|
|
$tokenMap[T_COMPILER_HALT_OFFSET] = Tokens::T_STRING; |
|
291
|
|
|
} |
|
292
|
|
|
|
|
293
|
|
|
return $tokenMap; |
|
294
|
|
|
} |
|
295
|
|
|
} |
|
296
|
|
|
|