Emulative::preprocessCode()   A
last analyzed

Complexity

Conditions 3
Paths 3

Size

Total Lines 22
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 4
Bugs 0 Features 3
Metric Value
c 4
b 0
f 3
dl 0
loc 22
rs 9.2
cc 3
eloc 14
nc 3
nop 1
1
<?php
2
3
namespace PhpParser\Lexer;
4
5
use PhpParser\Parser\Tokens;
6
7
/**
8
 * ATTENTION: This code is WRITE-ONLY. Do not try to read it.
9
 */
10
class Emulative extends \PhpParser\Lexer
11
{
12
    protected $newKeywords;
13
    protected $inObjectAccess;
14
15
    const T_ELLIPSIS   = 1001;
16
    const T_POW        = 1002;
17
    const T_POW_EQUAL  = 1003;
18
    const T_COALESCE   = 1004;
19
    const T_SPACESHIP  = 1005;
20
    const T_YIELD_FROM = 1006;
21
22
    const PHP_7_0 = '7.0.0dev';
23
    const PHP_5_6 = '5.6.0rc1';
24
    const PHP_5_5 = '5.5.0beta1';
25
26
    public function __construct(array $options = array()) {
27
        parent::__construct($options);
28
29
        $newKeywordsPerVersion = array(
30
            self::PHP_5_5 => array(
31
                'finally'       => Tokens::T_FINALLY,
32
                'yield'         => Tokens::T_YIELD,
33
            ),
34
        );
35
36
        $this->newKeywords = array();
37
        foreach ($newKeywordsPerVersion as $version => $newKeywords) {
38
            if (version_compare(PHP_VERSION, $version, '>=')) {
39
                break;
40
            }
41
42
            $this->newKeywords += $newKeywords;
43
        }
44
45
        if (version_compare(PHP_VERSION, self::PHP_7_0, '>=')) {
46
            return;
47
        }
48
        $this->tokenMap[self::T_COALESCE]   = Tokens::T_COALESCE;
49
        $this->tokenMap[self::T_SPACESHIP]  = Tokens::T_SPACESHIP;
50
        $this->tokenMap[self::T_YIELD_FROM] = Tokens::T_YIELD_FROM;
51
52
        if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) {
53
            return;
54
        }
55
        $this->tokenMap[self::T_ELLIPSIS]  = Tokens::T_ELLIPSIS;
56
        $this->tokenMap[self::T_POW]       = Tokens::T_POW;
57
        $this->tokenMap[self::T_POW_EQUAL] = Tokens::T_POW_EQUAL;
58
    }
59
60
    public function startLexing($code) {
61
        $this->inObjectAccess = false;
62
63
        $preprocessedCode = $this->preprocessCode($code);
64
        parent::startLexing($preprocessedCode);
65
        if ($preprocessedCode !== $code) {
66
            $this->postprocessTokens();
67
        }
68
69
        // Set code property back to the original code, so __halt_compiler()
70
        // handling and (start|end)FilePos attributes use the correct offsets
71
        $this->code = $code;
72
    }
73
74
    /*
75
     * Replaces new features in the code by ~__EMU__{NAME}__{DATA}__~ sequences.
76
     * ~LABEL~ is never valid PHP code, that's why we can (to some degree) safely
77
     * use it here.
78
     * Later when preprocessing the tokens these sequences will either be replaced
79
     * by real tokens or replaced with their original content (e.g. if they occurred
80
     * inside a string, i.e. a place where they don't have a special meaning).
81
     */
82
    protected function preprocessCode($code) {
83
        if (version_compare(PHP_VERSION, self::PHP_7_0, '>=')) {
84
            return $code;
85
        }
86
87
        $code = str_replace('??', '~__EMU__COALESCE__~', $code);
88
        $code = str_replace('<=>', '~__EMU__SPACESHIP__~', $code);
89
        $code = preg_replace_callback('(yield[ \n\r\t]+from)', function($matches) {
90
            // Encoding $0 in order to preserve exact whitespace
91
            return '~__EMU__YIELDFROM__' . bin2hex($matches[0]) . '__~';
92
        }, $code);
93
94
        if (version_compare(PHP_VERSION, self::PHP_5_6, '>=')) {
95
            return $code;
96
        }
97
98
        $code = str_replace('...', '~__EMU__ELLIPSIS__~', $code);
99
        $code = preg_replace('((?<!/)\*\*=)', '~__EMU__POWEQUAL__~', $code);
100
        $code = preg_replace('((?<!/)\*\*(?!/))', '~__EMU__POW__~', $code);
101
102
        return $code;
103
    }
104
105
    /*
106
     * Replaces the ~__EMU__...~ sequences with real tokens or their original
107
     * value.
108
     */
109
    protected function postprocessTokens() {
110
        // we need to manually iterate and manage a count because we'll change
111
        // the tokens array on the way
112
        for ($i = 0, $c = count($this->tokens); $i < $c; ++$i) {
113
            // first check that the following tokens are of form ~LABEL~,
114
            // then match the __EMU__... sequence.
115
            if ('~' === $this->tokens[$i]
116
                && isset($this->tokens[$i + 2])
117
                && '~' === $this->tokens[$i + 2]
118
                && T_STRING === $this->tokens[$i + 1][0]
119
                && preg_match('(^__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?$)', $this->tokens[$i + 1][1], $matches)
120
            ) {
121
                if ('ELLIPSIS' === $matches[1]) {
122
                    $replace = array(
123
                        array(self::T_ELLIPSIS, '...', $this->tokens[$i + 1][2])
124
                    );
125
                } else if ('POW' === $matches[1]) {
126
                    $replace = array(
127
                        array(self::T_POW, '**', $this->tokens[$i + 1][2])
128
                    );
129
                } else if ('POWEQUAL' === $matches[1]) {
130
                    $replace = array(
131
                        array(self::T_POW_EQUAL, '**=', $this->tokens[$i + 1][2])
132
                    );
133
                } else if ('COALESCE' === $matches[1]) {
134
                    $replace = array(
135
                        array(self::T_COALESCE, '??', $this->tokens[$i + 1][2])
136
                    );
137
                } else if ('SPACESHIP' === $matches[1]) {
138
                    $replace = array(
139
                        array(self::T_SPACESHIP, '<=>', $this->tokens[$i + 1][2]),
140
                    );
141
                } else if ('YIELDFROM' === $matches[1]) {
142
                    $content = hex2bin($matches[2]);
143
                    $replace = array(
144
                        array(self::T_YIELD_FROM, $content, $this->tokens[$i + 1][2] - substr_count($content, "\n"))
145
                    );
146
                } else {
147
                    throw new \RuntimeException('Invalid __EMU__ sequence');
148
                }
149
150
                array_splice($this->tokens, $i, 3, $replace);
151
                $c -= 3 - count($replace);
152
            // for multichar tokens (e.g. strings) replace any ~__EMU__...~ sequences
153
            // in their content with the original character sequence
154
            } elseif (is_array($this->tokens[$i])
155
                      && 0 !== strpos($this->tokens[$i][1], '__EMU__')
156
            ) {
157
                $this->tokens[$i][1] = preg_replace_callback(
158
                    '(~__EMU__([A-Z]++)__(?:([A-Za-z0-9]++)__)?~)',
159
                    array($this, 'restoreContentCallback'),
160
                    $this->tokens[$i][1]
161
                );
162
            }
163
        }
164
    }
165
166
    /*
167
     * This method is a callback for restoring EMU sequences in
168
     * multichar tokens (like strings) to their original value.
169
     */
170
    public function restoreContentCallback(array $matches) {
171
        if ('ELLIPSIS' === $matches[1]) {
172
            return '...';
173
        } else if ('POW' === $matches[1]) {
174
            return '**';
175
        } else if ('POWEQUAL' === $matches[1]) {
176
            return '**=';
177
        } else if ('COALESCE' === $matches[1]) {
178
            return '??';
179
        } else if ('SPACESHIP' === $matches[1]) {
180
            return '<=>';
181
        } else if ('YIELDFROM' === $matches[1]) {
182
            return hex2bin($matches[2]);
183
        } else {
184
            return $matches[0];
185
        }
186
    }
187
188
    public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) {
189
        $token = parent::getNextToken($value, $startAttributes, $endAttributes);
190
191
        // replace new keywords by their respective tokens. This is not done
192
        // if we currently are in an object access (e.g. in $obj->namespace
193
        // "namespace" stays a T_STRING tokens and isn't converted to T_NAMESPACE)
194
        if (Tokens::T_STRING === $token && !$this->inObjectAccess) {
195
            if (isset($this->newKeywords[strtolower($value)])) {
196
                return $this->newKeywords[strtolower($value)];
197
            }
198
        } else {
199
            // keep track of whether we currently are in an object access (after ->)
200
            $this->inObjectAccess = Tokens::T_OBJECT_OPERATOR === $token;
201
        }
202
203
        return $token;
204
    }
205
}
206