1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpParser\Lexer; |
4
|
|
|
|
5
|
|
|
use PhpParser\Parser\Tokens; |
6
|
|
|
|
7
|
|
|
/** |
8
|
|
|
* ATTENTION: This code is WRITE-ONLY. Do not try to read it. |
9
|
|
|
*/ |
10
|
|
|
class Emulative extends \PhpParser\Lexer |
11
|
|
|
{ |
12
|
|
|
protected $newKeywords; |
13
|
|
|
protected $inObjectAccess; |
14
|
|
|
|
15
|
|
|
const T_ELLIPSIS = 1001; |
16
|
|
|
const T_POW = 1002; |
17
|
|
|
const T_POW_EQUAL = 1003; |
18
|
|
|
const T_COALESCE = 1004; |
19
|
|
|
const T_SPACESHIP = 1005; |
20
|
|
|
const T_YIELD_FROM = 1006; |
21
|
|
|
|
22
|
|
|
const PHP_7_0 = '7.0.0dev'; |
23
|
|
|
const PHP_5_6 = '5.6.0rc1'; |
24
|
|
|
const PHP_5_5 = '5.5.0beta1'; |
25
|
|
|
|
26
|
|
|
public function __construct(array $options = array()) { |
27
|
|
|
parent::__construct($options); |
28
|
|
|
|
29
|
|
|
$newKeywordsPerVersion = array( |
30
|
|
|
self::PHP_5_5 => array( |
31
|
|
|
'finally' => Tokens::T_FINALLY, |
32
|
|
|
'yield' => Tokens::T_YIELD, |
33
|
|
|
), |
34
|
|
|
); |
35
|
|
|
|
36
|
|
|
$this->newKeywords = array(); |
37
|
|
|
foreach ($newKeywordsPerVersion as $version => $newKeywords) { |
38
|
|
|
if (version_compare(PHP_VERSION, $version, '>=')) { |
39
|
|
|
break; |
40
|
|
|
} |
41
|
|
|
|
42
|
|
|
$this->newKeywords += $newKeywords; |
43
|
|
|
} |
44
|
|
|
|
45
|
|
|
if (version_compare(PHP_VERSION, self::PHP_7_0, '>=')) { |
46
|
|
|
return; |
47
|
|
|
} |
48
|
|
|
$this->tokenMap[self::T_COALESCE] = Tokens::T_COALESCE; |
49
|
|
|
$this->tokenMap[self::T_SPACESHIP] = Tokens::T_SPACESHIP; |
50
|
|
|
$this->tokenMap[self::T_YIELD_FROM] = Tokens::T_YIELD_FROM; |
51
|
|
|
|
52
|
|
|
if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) { |
53
|
|
|
return; |
54
|
|
|
} |
55
|
|
|
$this->tokenMap[self::T_ELLIPSIS] = Tokens::T_ELLIPSIS; |
56
|
|
|
$this->tokenMap[self::T_POW] = Tokens::T_POW; |
57
|
|
|
$this->tokenMap[self::T_POW_EQUAL] = Tokens::T_POW_EQUAL; |
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
public function startLexing($code) { |
61
|
|
|
$this->inObjectAccess = false; |
62
|
|
|
|
63
|
|
|
$preprocessedCode = $this->preprocessCode($code); |
64
|
|
|
parent::startLexing($preprocessedCode); |
65
|
|
|
if ($preprocessedCode !== $code) { |
66
|
|
|
$this->postprocessTokens(); |
67
|
|
|
} |
68
|
|
|
|
69
|
|
|
// Set code property back to the original code, so __halt_compiler() |
70
|
|
|
// handling and (start|end)FilePos attributes use the correct offsets |
71
|
|
|
$this->code = $code; |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
/* |
75
|
|
|
* Replaces new features in the code by ~__EMU__{NAME}__{DATA}__~ sequences. |
76
|
|
|
* ~LABEL~ is never valid PHP code, that's why we can (to some degree) safely |
77
|
|
|
* use it here. |
78
|
|
|
* Later when preprocessing the tokens these sequences will either be replaced |
79
|
|
|
* by real tokens or replaced with their original content (e.g. if they occurred |
80
|
|
|
* inside a string, i.e. a place where they don't have a special meaning). |
81
|
|
|
*/ |
82
|
|
|
protected function preprocessCode($code) { |
83
|
|
|
if (version_compare(PHP_VERSION, self::PHP_7_0, '>=')) { |
84
|
|
|
return $code; |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
$code = str_replace('??', '~__EMU__COALESCE__~', $code); |
88
|
|
|
$code = str_replace('<=>', '~__EMU__SPACESHIP__~', $code); |
89
|
|
|
$code = preg_replace_callback('(yield[ \n\r\t]+from)', function($matches) { |
90
|
|
|
// Encoding $0 in order to preserve exact whitespace |
91
|
|
|
return '~__EMU__YIELDFROM__' . bin2hex($matches[0]) . '__~'; |
92
|
|
|
}, $code); |
93
|
|
|
|
94
|
|
|
if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) { |
95
|
|
|
return $code; |
96
|
|
|
} |
97
|
|
|
|
98
|
|
|
$code = str_replace('...', '~__EMU__ELLIPSIS__~', $code); |
99
|
|
|
$code = preg_replace('((?<!/)\*\*=)', '~__EMU__POWEQUAL__~', $code); |
100
|
|
|
$code = preg_replace('((?<!/)\*\*(?!/))', '~__EMU__POW__~', $code); |
101
|
|
|
|
102
|
|
|
return $code; |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
/* |
106
|
|
|
* Replaces the ~__EMU__...~ sequences with real tokens or their original |
107
|
|
|
* value. |
108
|
|
|
*/ |
109
|
|
|
protected function postprocessTokens() { |
110
|
|
|
// we need to manually iterate and manage a count because we'll change |
111
|
|
|
// the tokens array on the way |
112
|
|
|
for ($i = 0, $c = count($this->tokens); $i < $c; ++$i) { |
113
|
|
|
// first check that the following tokens are of form ~LABEL~, |
114
|
|
|
// then match the __EMU__... sequence. |
115
|
|
|
if ('~' === $this->tokens[$i] |
116
|
|
|
&& isset($this->tokens[$i + 2]) |
117
|
|
|
&& '~' === $this->tokens[$i + 2] |
118
|
|
|
&& T_STRING === $this->tokens[$i + 1][0] |
119
|
|
|
&& preg_match('(^__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?$)', $this->tokens[$i + 1][1], $matches) |
120
|
|
|
) { |
121
|
|
|
if ('ELLIPSIS' === $matches[1]) { |
122
|
|
|
$replace = array( |
123
|
|
|
array(self::T_ELLIPSIS, '...', $this->tokens[$i + 1][2]) |
124
|
|
|
); |
125
|
|
|
} else if ('POW' === $matches[1]) { |
126
|
|
|
$replace = array( |
127
|
|
|
array(self::T_POW, '**', $this->tokens[$i + 1][2]) |
128
|
|
|
); |
129
|
|
|
} else if ('POWEQUAL' === $matches[1]) { |
130
|
|
|
$replace = array( |
131
|
|
|
array(self::T_POW_EQUAL, '**=', $this->tokens[$i + 1][2]) |
132
|
|
|
); |
133
|
|
|
} else if ('COALESCE' === $matches[1]) { |
134
|
|
|
$replace = array( |
135
|
|
|
array(self::T_COALESCE, '??', $this->tokens[$i + 1][2]) |
136
|
|
|
); |
137
|
|
|
} else if ('SPACESHIP' === $matches[1]) { |
138
|
|
|
$replace = array( |
139
|
|
|
array(self::T_SPACESHIP, '<=>', $this->tokens[$i + 1][2]), |
140
|
|
|
); |
141
|
|
|
} else if ('YIELDFROM' === $matches[1]) { |
142
|
|
|
$content = hex2bin($matches[2]); |
143
|
|
|
$replace = array( |
144
|
|
|
array(self::T_YIELD_FROM, $content, $this->tokens[$i + 1][2] - substr_count($content, "\n")) |
145
|
|
|
); |
146
|
|
|
} else { |
147
|
|
|
throw new \RuntimeException('Invalid __EMU__ sequence'); |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
array_splice($this->tokens, $i, 3, $replace); |
151
|
|
|
$c -= 3 - count($replace); |
152
|
|
|
// for multichar tokens (e.g. strings) replace any ~__EMU__...~ sequences |
153
|
|
|
// in their content with the original character sequence |
154
|
|
|
} elseif (is_array($this->tokens[$i]) |
155
|
|
|
&& 0 !== strpos($this->tokens[$i][1], '__EMU__') |
156
|
|
|
) { |
157
|
|
|
$this->tokens[$i][1] = preg_replace_callback( |
158
|
|
|
'(~__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?~)', |
159
|
|
|
array($this, 'restoreContentCallback'), |
160
|
|
|
$this->tokens[$i][1] |
161
|
|
|
); |
162
|
|
|
} |
163
|
|
|
} |
164
|
|
|
} |
165
|
|
|
|
166
|
|
|
/* |
167
|
|
|
* This method is a callback for restoring EMU sequences in |
168
|
|
|
* multichar tokens (like strings) to their original value. |
169
|
|
|
*/ |
170
|
|
|
public function restoreContentCallback(array $matches) { |
171
|
|
|
if ('ELLIPSIS' === $matches[1]) { |
172
|
|
|
return '...'; |
173
|
|
|
} else if ('POW' === $matches[1]) { |
174
|
|
|
return '**'; |
175
|
|
|
} else if ('POWEQUAL' === $matches[1]) { |
176
|
|
|
return '**='; |
177
|
|
|
} else if ('COALESCE' === $matches[1]) { |
178
|
|
|
return '??'; |
179
|
|
|
} else if ('SPACESHIP' === $matches[1]) { |
180
|
|
|
return '<=>'; |
181
|
|
|
} else if ('YIELDFROM' === $matches[1]) { |
182
|
|
|
return hex2bin($matches[2]); |
183
|
|
|
} else { |
184
|
|
|
return $matches[0]; |
185
|
|
|
} |
186
|
|
|
} |
187
|
|
|
|
188
|
|
|
public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) { |
189
|
|
|
$token = parent::getNextToken($value, $startAttributes, $endAttributes); |
190
|
|
|
|
191
|
|
|
// replace new keywords by their respective tokens. This is not done |
192
|
|
|
// if we currently are in an object access (e.g. in $obj->namespace |
193
|
|
|
// "namespace" stays a T_STRING tokens and isn't converted to T_NAMESPACE) |
194
|
|
|
if (Tokens::T_STRING === $token && !$this->inObjectAccess) { |
195
|
|
|
if (isset($this->newKeywords[strtolower($value)])) { |
196
|
|
|
return $this->newKeywords[strtolower($value)]; |
197
|
|
|
} |
198
|
|
|
} else { |
199
|
|
|
// keep track of whether we currently are in an object access (after ->) |
200
|
|
|
$this->inObjectAccess = Tokens::T_OBJECT_OPERATOR === $token; |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
return $token; |
204
|
|
|
} |
205
|
|
|
} |
206
|
|
|
|