|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace PhpParser\Lexer; |
|
4
|
|
|
|
|
5
|
|
|
use PhpParser\Parser\Tokens; |
|
6
|
|
|
|
|
7
|
|
|
/** |
|
8
|
|
|
* ATTENTION: This code is WRITE-ONLY. Do not try to read it. |
|
9
|
|
|
*/ |
|
10
|
|
|
class Emulative extends \PhpParser\Lexer |
|
11
|
|
|
{ |
|
12
|
|
|
protected $newKeywords; |
|
13
|
|
|
protected $inObjectAccess; |
|
14
|
|
|
|
|
15
|
|
|
const T_ELLIPSIS = 1001; |
|
16
|
|
|
const T_POW = 1002; |
|
17
|
|
|
const T_POW_EQUAL = 1003; |
|
18
|
|
|
const T_COALESCE = 1004; |
|
19
|
|
|
const T_SPACESHIP = 1005; |
|
20
|
|
|
const T_YIELD_FROM = 1006; |
|
21
|
|
|
|
|
22
|
|
|
const PHP_7_0 = '7.0.0dev'; |
|
23
|
|
|
const PHP_5_6 = '5.6.0rc1'; |
|
24
|
|
|
const PHP_5_5 = '5.5.0beta1'; |
|
25
|
|
|
|
|
26
|
|
|
public function __construct(array $options = array()) { |
|
27
|
|
|
parent::__construct($options); |
|
28
|
|
|
|
|
29
|
|
|
$newKeywordsPerVersion = array( |
|
30
|
|
|
self::PHP_5_5 => array( |
|
31
|
|
|
'finally' => Tokens::T_FINALLY, |
|
32
|
|
|
'yield' => Tokens::T_YIELD, |
|
33
|
|
|
), |
|
34
|
|
|
); |
|
35
|
|
|
|
|
36
|
|
|
$this->newKeywords = array(); |
|
37
|
|
|
foreach ($newKeywordsPerVersion as $version => $newKeywords) { |
|
38
|
|
|
if (version_compare(PHP_VERSION, $version, '>=')) { |
|
39
|
|
|
break; |
|
40
|
|
|
} |
|
41
|
|
|
|
|
42
|
|
|
$this->newKeywords += $newKeywords; |
|
43
|
|
|
} |
|
44
|
|
|
|
|
45
|
|
|
if (version_compare(PHP_VERSION, self::PHP_7_0, '>=')) { |
|
46
|
|
|
return; |
|
47
|
|
|
} |
|
48
|
|
|
$this->tokenMap[self::T_COALESCE] = Tokens::T_COALESCE; |
|
49
|
|
|
$this->tokenMap[self::T_SPACESHIP] = Tokens::T_SPACESHIP; |
|
50
|
|
|
$this->tokenMap[self::T_YIELD_FROM] = Tokens::T_YIELD_FROM; |
|
51
|
|
|
|
|
52
|
|
|
if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) { |
|
53
|
|
|
return; |
|
54
|
|
|
} |
|
55
|
|
|
$this->tokenMap[self::T_ELLIPSIS] = Tokens::T_ELLIPSIS; |
|
56
|
|
|
$this->tokenMap[self::T_POW] = Tokens::T_POW; |
|
57
|
|
|
$this->tokenMap[self::T_POW_EQUAL] = Tokens::T_POW_EQUAL; |
|
58
|
|
|
} |
|
59
|
|
|
|
|
60
|
|
|
public function startLexing($code) { |
|
61
|
|
|
$this->inObjectAccess = false; |
|
62
|
|
|
|
|
63
|
|
|
$preprocessedCode = $this->preprocessCode($code); |
|
64
|
|
|
parent::startLexing($preprocessedCode); |
|
65
|
|
|
if ($preprocessedCode !== $code) { |
|
66
|
|
|
$this->postprocessTokens(); |
|
67
|
|
|
} |
|
68
|
|
|
|
|
69
|
|
|
// Set code property back to the original code, so __halt_compiler() |
|
70
|
|
|
// handling and (start|end)FilePos attributes use the correct offsets |
|
71
|
|
|
$this->code = $code; |
|
72
|
|
|
} |
|
73
|
|
|
|
|
74
|
|
|
/* |
|
75
|
|
|
* Replaces new features in the code by ~__EMU__{NAME}__{DATA}__~ sequences. |
|
76
|
|
|
* ~LABEL~ is never valid PHP code, that's why we can (to some degree) safely |
|
77
|
|
|
* use it here. |
|
78
|
|
|
* Later when preprocessing the tokens these sequences will either be replaced |
|
79
|
|
|
* by real tokens or replaced with their original content (e.g. if they occurred |
|
80
|
|
|
* inside a string, i.e. a place where they don't have a special meaning). |
|
81
|
|
|
*/ |
|
82
|
|
|
protected function preprocessCode($code) { |
|
83
|
|
|
if (version_compare(PHP_VERSION, self::PHP_7_0, '>=')) { |
|
84
|
|
|
return $code; |
|
85
|
|
|
} |
|
86
|
|
|
|
|
87
|
|
|
$code = str_replace('??', '~__EMU__COALESCE__~', $code); |
|
88
|
|
|
$code = str_replace('<=>', '~__EMU__SPACESHIP__~', $code); |
|
89
|
|
|
$code = preg_replace_callback('(yield[ \n\r\t]+from)', function($matches) { |
|
90
|
|
|
// Encoding $0 in order to preserve exact whitespace |
|
91
|
|
|
return '~__EMU__YIELDFROM__' . bin2hex($matches[0]) . '__~'; |
|
92
|
|
|
}, $code); |
|
93
|
|
|
|
|
94
|
|
|
if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) { |
|
95
|
|
|
return $code; |
|
96
|
|
|
} |
|
97
|
|
|
|
|
98
|
|
|
$code = str_replace('...', '~__EMU__ELLIPSIS__~', $code); |
|
99
|
|
|
$code = preg_replace('((?<!/)\*\*=)', '~__EMU__POWEQUAL__~', $code); |
|
100
|
|
|
$code = preg_replace('((?<!/)\*\*(?!/))', '~__EMU__POW__~', $code); |
|
101
|
|
|
|
|
102
|
|
|
return $code; |
|
103
|
|
|
} |
|
104
|
|
|
|
|
105
|
|
|
/* |
|
106
|
|
|
* Replaces the ~__EMU__...~ sequences with real tokens or their original |
|
107
|
|
|
* value. |
|
108
|
|
|
*/ |
|
109
|
|
|
protected function postprocessTokens() { |
|
110
|
|
|
// we need to manually iterate and manage a count because we'll change |
|
111
|
|
|
// the tokens array on the way |
|
112
|
|
|
for ($i = 0, $c = count($this->tokens); $i < $c; ++$i) { |
|
113
|
|
|
// first check that the following tokens are of form ~LABEL~, |
|
114
|
|
|
// then match the __EMU__... sequence. |
|
115
|
|
|
if ('~' === $this->tokens[$i] |
|
116
|
|
|
&& isset($this->tokens[$i + 2]) |
|
117
|
|
|
&& '~' === $this->tokens[$i + 2] |
|
118
|
|
|
&& T_STRING === $this->tokens[$i + 1][0] |
|
119
|
|
|
&& preg_match('(^__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?$)', $this->tokens[$i + 1][1], $matches) |
|
120
|
|
|
) { |
|
121
|
|
|
if ('ELLIPSIS' === $matches[1]) { |
|
122
|
|
|
$replace = array( |
|
123
|
|
|
array(self::T_ELLIPSIS, '...', $this->tokens[$i + 1][2]) |
|
124
|
|
|
); |
|
125
|
|
|
} else if ('POW' === $matches[1]) { |
|
126
|
|
|
$replace = array( |
|
127
|
|
|
array(self::T_POW, '**', $this->tokens[$i + 1][2]) |
|
128
|
|
|
); |
|
129
|
|
|
} else if ('POWEQUAL' === $matches[1]) { |
|
130
|
|
|
$replace = array( |
|
131
|
|
|
array(self::T_POW_EQUAL, '**=', $this->tokens[$i + 1][2]) |
|
132
|
|
|
); |
|
133
|
|
|
} else if ('COALESCE' === $matches[1]) { |
|
134
|
|
|
$replace = array( |
|
135
|
|
|
array(self::T_COALESCE, '??', $this->tokens[$i + 1][2]) |
|
136
|
|
|
); |
|
137
|
|
|
} else if ('SPACESHIP' === $matches[1]) { |
|
138
|
|
|
$replace = array( |
|
139
|
|
|
array(self::T_SPACESHIP, '<=>', $this->tokens[$i + 1][2]), |
|
140
|
|
|
); |
|
141
|
|
|
} else if ('YIELDFROM' === $matches[1]) { |
|
142
|
|
|
$content = hex2bin($matches[2]); |
|
143
|
|
|
$replace = array( |
|
144
|
|
|
array(self::T_YIELD_FROM, $content, $this->tokens[$i + 1][2] - substr_count($content, "\n")) |
|
145
|
|
|
); |
|
146
|
|
|
} else { |
|
147
|
|
|
throw new \RuntimeException('Invalid __EMU__ sequence'); |
|
148
|
|
|
} |
|
149
|
|
|
|
|
150
|
|
|
array_splice($this->tokens, $i, 3, $replace); |
|
151
|
|
|
$c -= 3 - count($replace); |
|
152
|
|
|
// for multichar tokens (e.g. strings) replace any ~__EMU__...~ sequences |
|
153
|
|
|
// in their content with the original character sequence |
|
154
|
|
|
} elseif (is_array($this->tokens[$i]) |
|
155
|
|
|
&& 0 !== strpos($this->tokens[$i][1], '__EMU__') |
|
156
|
|
|
) { |
|
157
|
|
|
$this->tokens[$i][1] = preg_replace_callback( |
|
158
|
|
|
'(~__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?~)', |
|
159
|
|
|
array($this, 'restoreContentCallback'), |
|
160
|
|
|
$this->tokens[$i][1] |
|
161
|
|
|
); |
|
162
|
|
|
} |
|
163
|
|
|
} |
|
164
|
|
|
} |
|
165
|
|
|
|
|
166
|
|
|
/* |
|
167
|
|
|
* This method is a callback for restoring EMU sequences in |
|
168
|
|
|
* multichar tokens (like strings) to their original value. |
|
169
|
|
|
*/ |
|
170
|
|
|
public function restoreContentCallback(array $matches) { |
|
171
|
|
|
if ('ELLIPSIS' === $matches[1]) { |
|
172
|
|
|
return '...'; |
|
173
|
|
|
} else if ('POW' === $matches[1]) { |
|
174
|
|
|
return '**'; |
|
175
|
|
|
} else if ('POWEQUAL' === $matches[1]) { |
|
176
|
|
|
return '**='; |
|
177
|
|
|
} else if ('COALESCE' === $matches[1]) { |
|
178
|
|
|
return '??'; |
|
179
|
|
|
} else if ('SPACESHIP' === $matches[1]) { |
|
180
|
|
|
return '<=>'; |
|
181
|
|
|
} else if ('YIELDFROM' === $matches[1]) { |
|
182
|
|
|
return hex2bin($matches[2]); |
|
183
|
|
|
} else { |
|
184
|
|
|
return $matches[0]; |
|
185
|
|
|
} |
|
186
|
|
|
} |
|
187
|
|
|
|
|
188
|
|
|
public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) { |
|
189
|
|
|
$token = parent::getNextToken($value, $startAttributes, $endAttributes); |
|
190
|
|
|
|
|
191
|
|
|
// replace new keywords by their respective tokens. This is not done |
|
192
|
|
|
// if we currently are in an object access (e.g. in $obj->namespace |
|
193
|
|
|
// "namespace" stays a T_STRING tokens and isn't converted to T_NAMESPACE) |
|
194
|
|
|
if (Tokens::T_STRING === $token && !$this->inObjectAccess) { |
|
195
|
|
|
if (isset($this->newKeywords[strtolower($value)])) { |
|
196
|
|
|
return $this->newKeywords[strtolower($value)]; |
|
197
|
|
|
} |
|
198
|
|
|
} else { |
|
199
|
|
|
// keep track of whether we currently are in an object access (after ->) |
|
200
|
|
|
$this->inObjectAccess = Tokens::T_OBJECT_OPERATOR === $token; |
|
201
|
|
|
} |
|
202
|
|
|
|
|
203
|
|
|
return $token; |
|
204
|
|
|
} |
|
205
|
|
|
} |
|
206
|
|
|
|