1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpParser; |
4
|
|
|
|
5
|
|
|
use PhpParser\Parser\Tokens; |
6
|
|
|
|
7
|
|
|
class Lexer |
8
|
|
|
{ |
9
|
|
|
protected $code; |
10
|
|
|
protected $tokens; |
11
|
|
|
protected $pos; |
12
|
|
|
protected $line; |
13
|
|
|
protected $filePos; |
14
|
|
|
|
15
|
|
|
protected $tokenMap; |
16
|
|
|
protected $dropTokens; |
17
|
|
|
|
18
|
|
|
protected $usedAttributes; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* Creates a Lexer. |
22
|
|
|
* |
23
|
|
|
* @param array $options Options array. Currently only the 'usedAttributes' option is supported, |
24
|
|
|
* which is an array of attributes to add to the AST nodes. Possible attributes |
25
|
|
|
* are: 'comments', 'startLine', 'endLine', 'startTokenPos', 'endTokenPos', |
26
|
|
|
* 'startFilePos', 'endFilePos'. The option defaults to the first three. |
27
|
|
|
* For more info see getNextToken() docs. |
28
|
|
|
*/ |
29
|
|
|
public function __construct(array $options = array()) { |
30
|
|
|
// map from internal tokens to PhpParser tokens |
31
|
|
|
$this->tokenMap = $this->createTokenMap(); |
32
|
|
|
|
33
|
|
|
// map of tokens to drop while lexing (the map is only used for isset lookup, |
34
|
|
|
// that's why the value is simply set to 1; the value is never actually used.) |
35
|
|
|
$this->dropTokens = array_fill_keys(array(T_WHITESPACE, T_OPEN_TAG), 1); |
36
|
|
|
|
37
|
|
|
// the usedAttributes member is a map of the used attribute names to a dummy |
38
|
|
|
// value (here "true") |
39
|
|
|
$options += array( |
40
|
|
|
'usedAttributes' => array('comments', 'startLine', 'endLine'), |
41
|
|
|
); |
42
|
|
|
$this->usedAttributes = array_fill_keys($options['usedAttributes'], true); |
43
|
|
|
} |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* Initializes the lexer for lexing the provided source code. |
47
|
|
|
* |
48
|
|
|
* @param string $code The source code to lex |
49
|
|
|
* |
50
|
|
|
* @throws Error on lexing errors (unterminated comment or unexpected character) |
51
|
|
|
*/ |
52
|
|
|
public function startLexing($code) { |
53
|
|
|
$scream = ini_set('xdebug.scream', '0'); |
54
|
|
|
|
55
|
|
|
$this->resetErrors(); |
56
|
|
|
$this->tokens = @token_get_all($code); |
57
|
|
|
$this->handleErrors(); |
58
|
|
|
|
59
|
|
|
if (false !== $scream) { |
60
|
|
|
ini_set('xdebug.scream', $scream); |
61
|
|
|
} |
62
|
|
|
|
63
|
|
|
$this->code = $code; // keep the code around for __halt_compiler() handling |
64
|
|
|
$this->pos = -1; |
65
|
|
|
$this->line = 1; |
66
|
|
|
$this->filePos = 0; |
67
|
|
|
} |
68
|
|
|
|
69
|
|
|
protected function resetErrors() { |
70
|
|
|
if (function_exists('error_clear_last')) { |
71
|
|
|
error_clear_last(); |
72
|
|
|
} else { |
73
|
|
|
// set error_get_last() to defined state by forcing an undefined variable error |
74
|
|
|
set_error_handler(function() { return false; }, 0); |
75
|
|
|
@$undefinedVariable; |
76
|
|
|
restore_error_handler(); |
77
|
|
|
} |
78
|
|
|
} |
79
|
|
|
|
80
|
|
|
protected function handleErrors() { |
81
|
|
|
$error = error_get_last(); |
82
|
|
|
if (null === $error) { |
83
|
|
|
return; |
84
|
|
|
} |
85
|
|
|
|
86
|
|
View Code Duplication |
if (preg_match( |
87
|
|
|
'~^Unterminated comment starting line ([0-9]+)$~', |
88
|
|
|
$error['message'], $matches |
89
|
|
|
)) { |
90
|
|
|
throw new Error('Unterminated comment', (int) $matches[1]); |
91
|
|
|
} |
92
|
|
|
|
93
|
|
View Code Duplication |
if (preg_match( |
94
|
|
|
'~^Unexpected character in input: \'(.)\' \(ASCII=([0-9]+)\)~s', |
95
|
|
|
$error['message'], $matches |
96
|
|
|
)) { |
97
|
|
|
throw new Error(sprintf( |
98
|
|
|
'Unexpected character "%s" (ASCII %d)', |
99
|
|
|
$matches[1], $matches[2] |
100
|
|
|
)); |
101
|
|
|
} |
102
|
|
|
|
103
|
|
|
// PHP cuts error message after null byte, so need special case |
104
|
|
|
if (preg_match('~^Unexpected character in input: \'$~', $error['message'])) { |
105
|
|
|
throw new Error('Unexpected null byte'); |
106
|
|
|
} |
107
|
|
|
} |
108
|
|
|
|
109
|
|
|
/** |
110
|
|
|
* Fetches the next token. |
111
|
|
|
* |
112
|
|
|
* The available attributes are determined by the 'usedAttributes' option, which can |
113
|
|
|
* be specified in the constructor. The following attributes are supported: |
114
|
|
|
* |
115
|
|
|
* * 'comments' => Array of PhpParser\Comment or PhpParser\Comment\Doc instances, |
116
|
|
|
* representing all comments that occurred between the previous |
117
|
|
|
* non-discarded token and the current one. |
118
|
|
|
* * 'startLine' => Line in which the node starts. |
119
|
|
|
* * 'endLine' => Line in which the node ends. |
120
|
|
|
* * 'startTokenPos' => Offset into the token array of the first token in the node. |
121
|
|
|
* * 'endTokenPos' => Offset into the token array of the last token in the node. |
122
|
|
|
* * 'startFilePos' => Offset into the code string of the first character that is part of the node. |
123
|
|
|
* * 'endFilePos' => Offset into the code string of the last character that is part of the node |
124
|
|
|
* |
125
|
|
|
* @param mixed $value Variable to store token content in |
126
|
|
|
* @param mixed $startAttributes Variable to store start attributes in |
127
|
|
|
* @param mixed $endAttributes Variable to store end attributes in |
128
|
|
|
* |
129
|
|
|
* @return int Token id |
130
|
|
|
*/ |
131
|
|
|
public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) { |
132
|
|
|
$startAttributes = array(); |
133
|
|
|
$endAttributes = array(); |
134
|
|
|
|
135
|
|
|
while (1) { |
136
|
|
|
if (isset($this->tokens[++$this->pos])) { |
137
|
|
|
$token = $this->tokens[$this->pos]; |
138
|
|
|
} else { |
139
|
|
|
// EOF token with ID 0 |
140
|
|
|
$token = "\0"; |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
if (isset($this->usedAttributes['startTokenPos'])) { |
144
|
|
|
$startAttributes['startTokenPos'] = $this->pos; |
145
|
|
|
} |
146
|
|
|
if (isset($this->usedAttributes['startFilePos'])) { |
147
|
|
|
$startAttributes['startFilePos'] = $this->filePos; |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
if (is_string($token)) { |
151
|
|
|
// bug in token_get_all |
152
|
|
|
if ('b"' === $token) { |
153
|
|
|
$value = 'b"'; |
154
|
|
|
$this->filePos += 2; |
155
|
|
|
$id = ord('"'); |
156
|
|
|
} else { |
157
|
|
|
$value = $token; |
158
|
|
|
$this->filePos += 1; |
159
|
|
|
$id = ord($token); |
160
|
|
|
} |
161
|
|
|
|
162
|
|
|
if (isset($this->usedAttributes['startLine'])) { |
163
|
|
|
$startAttributes['startLine'] = $this->line; |
164
|
|
|
} |
165
|
|
|
if (isset($this->usedAttributes['endLine'])) { |
166
|
|
|
$endAttributes['endLine'] = $this->line; |
167
|
|
|
} |
168
|
|
|
if (isset($this->usedAttributes['endTokenPos'])) { |
169
|
|
|
$endAttributes['endTokenPos'] = $this->pos; |
170
|
|
|
} |
171
|
|
|
if (isset($this->usedAttributes['endFilePos'])) { |
172
|
|
|
$endAttributes['endFilePos'] = $this->filePos - 1; |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
return $id; |
176
|
|
|
} else { |
177
|
|
|
$this->line += substr_count($token[1], "\n"); |
178
|
|
|
$this->filePos += strlen($token[1]); |
179
|
|
|
|
180
|
|
|
if (T_COMMENT === $token[0]) { |
181
|
|
View Code Duplication |
if (isset($this->usedAttributes['comments'])) { |
182
|
|
|
$startAttributes['comments'][] = new Comment($token[1], $token[2]); |
183
|
|
|
} |
184
|
|
|
} elseif (T_DOC_COMMENT === $token[0]) { |
185
|
|
View Code Duplication |
if (isset($this->usedAttributes['comments'])) { |
186
|
|
|
$startAttributes['comments'][] = new Comment\Doc($token[1], $token[2]); |
187
|
|
|
} |
188
|
|
|
} elseif (!isset($this->dropTokens[$token[0]])) { |
189
|
|
|
$value = $token[1]; |
190
|
|
|
|
191
|
|
|
if (isset($this->usedAttributes['startLine'])) { |
192
|
|
|
$startAttributes['startLine'] = $token[2]; |
193
|
|
|
} |
194
|
|
|
if (isset($this->usedAttributes['endLine'])) { |
195
|
|
|
$endAttributes['endLine'] = $this->line; |
196
|
|
|
} |
197
|
|
|
if (isset($this->usedAttributes['endTokenPos'])) { |
198
|
|
|
$endAttributes['endTokenPos'] = $this->pos; |
199
|
|
|
} |
200
|
|
|
if (isset($this->usedAttributes['endFilePos'])) { |
201
|
|
|
$endAttributes['endFilePos'] = $this->filePos - 1; |
202
|
|
|
} |
203
|
|
|
|
204
|
|
|
return $this->tokenMap[$token[0]]; |
205
|
|
|
} |
206
|
|
|
} |
207
|
|
|
} |
208
|
|
|
|
209
|
|
|
throw new \RuntimeException('Reached end of lexer loop'); |
210
|
|
|
} |
211
|
|
|
|
212
|
|
|
/** |
213
|
|
|
* Returns the token array for current code. |
214
|
|
|
* |
215
|
|
|
* The token array is in the same format as provided by the |
216
|
|
|
* token_get_all() function and does not discard tokens (i.e. |
217
|
|
|
* whitespace and comments are included). The token position |
218
|
|
|
* attributes are against this token array. |
219
|
|
|
* |
220
|
|
|
* @return array Array of tokens in token_get_all() format |
221
|
|
|
*/ |
222
|
|
|
public function getTokens() { |
223
|
|
|
return $this->tokens; |
224
|
|
|
} |
225
|
|
|
|
226
|
|
|
/** |
227
|
|
|
* Handles __halt_compiler() by returning the text after it. |
228
|
|
|
* |
229
|
|
|
* @return string Remaining text |
230
|
|
|
*/ |
231
|
|
|
public function handleHaltCompiler() { |
232
|
|
|
// text after T_HALT_COMPILER, still including (); |
233
|
|
|
$textAfter = substr($this->code, $this->filePos); |
234
|
|
|
|
235
|
|
|
// ensure that it is followed by (); |
236
|
|
|
// this simplifies the situation, by not allowing any comments |
237
|
|
|
// in between of the tokens. |
238
|
|
|
if (!preg_match('~^\s*\(\s*\)\s*(?:;|\?>\r?\n?)~', $textAfter, $matches)) { |
239
|
|
|
throw new Error('__HALT_COMPILER must be followed by "();"'); |
240
|
|
|
} |
241
|
|
|
|
242
|
|
|
// prevent the lexer from returning any further tokens |
243
|
|
|
$this->pos = count($this->tokens); |
244
|
|
|
|
245
|
|
|
// return with (); removed |
246
|
|
|
return (string) substr($textAfter, strlen($matches[0])); // (string) converts false to '' |
247
|
|
|
} |
248
|
|
|
|
249
|
|
|
/** |
250
|
|
|
* Creates the token map. |
251
|
|
|
* |
252
|
|
|
* The token map maps the PHP internal token identifiers |
253
|
|
|
* to the identifiers used by the Parser. Additionally it |
254
|
|
|
* maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'. |
255
|
|
|
* |
256
|
|
|
* @return array The token map |
257
|
|
|
*/ |
258
|
|
|
protected function createTokenMap() { |
259
|
|
|
$tokenMap = array(); |
260
|
|
|
|
261
|
|
|
// 256 is the minimum possible token number, as everything below |
262
|
|
|
// it is an ASCII value |
263
|
|
|
for ($i = 256; $i < 1000; ++$i) { |
264
|
|
|
if (T_DOUBLE_COLON === $i) { |
265
|
|
|
// T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM |
266
|
|
|
$tokenMap[$i] = Tokens::T_PAAMAYIM_NEKUDOTAYIM; |
267
|
|
|
} elseif(T_OPEN_TAG_WITH_ECHO === $i) { |
268
|
|
|
// T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO |
269
|
|
|
$tokenMap[$i] = Tokens::T_ECHO; |
270
|
|
|
} elseif(T_CLOSE_TAG === $i) { |
271
|
|
|
// T_CLOSE_TAG is equivalent to ';' |
272
|
|
|
$tokenMap[$i] = ord(';'); |
273
|
|
|
} elseif ('UNKNOWN' !== $name = token_name($i)) { |
274
|
|
|
if ('T_HASHBANG' === $name) { |
275
|
|
|
// HHVM uses a special token for #! hashbang lines |
276
|
|
|
$tokenMap[$i] = Tokens::T_INLINE_HTML; |
277
|
|
|
} else if (defined($name = 'PhpParser\Parser\Tokens::' . $name)) { |
278
|
|
|
// Other tokens can be mapped directly |
279
|
|
|
$tokenMap[$i] = constant($name); |
280
|
|
|
} |
281
|
|
|
} |
282
|
|
|
} |
283
|
|
|
|
284
|
|
|
// HHVM uses a special token for numbers that overflow to double |
285
|
|
|
if (defined('T_ONUMBER')) { |
286
|
|
|
$tokenMap[T_ONUMBER] = Tokens::T_DNUMBER; |
287
|
|
|
} |
288
|
|
|
// HHVM also has a separate token for the __COMPILER_HALT_OFFSET__ constant |
289
|
|
|
if (defined('T_COMPILER_HALT_OFFSET')) { |
290
|
|
|
$tokenMap[T_COMPILER_HALT_OFFSET] = Tokens::T_STRING; |
291
|
|
|
} |
292
|
|
|
|
293
|
|
|
return $tokenMap; |
294
|
|
|
} |
295
|
|
|
} |
296
|
|
|
|