Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Lexer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Lexer, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
7 | class Lexer |
||
8 | { |
||
9 | protected $code; |
||
10 | protected $tokens; |
||
11 | protected $pos; |
||
12 | protected $line; |
||
13 | protected $filePos; |
||
14 | |||
15 | protected $tokenMap; |
||
16 | protected $dropTokens; |
||
17 | |||
18 | protected $usedAttributes; |
||
19 | |||
20 | /** |
||
21 | * Creates a Lexer. |
||
22 | * |
||
23 | * @param array $options Options array. Currently only the 'usedAttributes' option is supported, |
||
24 | * which is an array of attributes to add to the AST nodes. Possible attributes |
||
25 | * are: 'comments', 'startLine', 'endLine', 'startTokenPos', 'endTokenPos', |
||
26 | * 'startFilePos', 'endFilePos'. The option defaults to the first three. |
||
27 | * For more info see getNextToken() docs. |
||
28 | */ |
||
29 | public function __construct(array $options = array()) { |
||
30 | // map from internal tokens to PhpParser tokens |
||
31 | $this->tokenMap = $this->createTokenMap(); |
||
32 | |||
33 | // map of tokens to drop while lexing (the map is only used for isset lookup, |
||
34 | // that's why the value is simply set to 1; the value is never actually used.) |
||
35 | $this->dropTokens = array_fill_keys(array(T_WHITESPACE, T_OPEN_TAG), 1); |
||
36 | |||
37 | // the usedAttributes member is a map of the used attribute names to a dummy |
||
38 | // value (here "true") |
||
39 | $options += array( |
||
40 | 'usedAttributes' => array('comments', 'startLine', 'endLine'), |
||
41 | ); |
||
42 | $this->usedAttributes = array_fill_keys($options['usedAttributes'], true); |
||
43 | } |
||
44 | |||
45 | /** |
||
46 | * Initializes the lexer for lexing the provided source code. |
||
47 | * |
||
48 | * @param string $code The source code to lex |
||
49 | * |
||
50 | * @throws Error on lexing errors (unterminated comment or unexpected character) |
||
51 | */ |
||
52 | public function startLexing($code) { |
||
53 | $scream = ini_set('xdebug.scream', '0'); |
||
54 | |||
55 | $this->resetErrors(); |
||
56 | $this->tokens = @token_get_all($code); |
||
57 | $this->handleErrors(); |
||
58 | |||
59 | if (false !== $scream) { |
||
60 | ini_set('xdebug.scream', $scream); |
||
61 | } |
||
62 | |||
63 | $this->code = $code; // keep the code around for __halt_compiler() handling |
||
64 | $this->pos = -1; |
||
65 | $this->line = 1; |
||
66 | $this->filePos = 0; |
||
67 | } |
||
68 | |||
69 | protected function resetErrors() { |
||
79 | |||
80 | protected function handleErrors() { |
||
108 | |||
109 | /** |
||
110 | * Fetches the next token. |
||
111 | * |
||
112 | * The available attributes are determined by the 'usedAttributes' option, which can |
||
113 | * be specified in the constructor. The following attributes are supported: |
||
114 | * |
||
115 | * * 'comments' => Array of PhpParser\Comment or PhpParser\Comment\Doc instances, |
||
116 | * representing all comments that occurred between the previous |
||
117 | * non-discarded token and the current one. |
||
118 | * * 'startLine' => Line in which the node starts. |
||
119 | * * 'endLine' => Line in which the node ends. |
||
120 | * * 'startTokenPos' => Offset into the token array of the first token in the node. |
||
121 | * * 'endTokenPos' => Offset into the token array of the last token in the node. |
||
122 | * * 'startFilePos' => Offset into the code string of the first character that is part of the node. |
||
123 | * * 'endFilePos' => Offset into the code string of the last character that is part of the node |
||
124 | * |
||
125 | * @param mixed $value Variable to store token content in |
||
126 | * @param mixed $startAttributes Variable to store start attributes in |
||
127 | * @param mixed $endAttributes Variable to store end attributes in |
||
128 | * |
||
129 | * @return int Token id |
||
130 | */ |
||
131 | public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) { |
||
132 | $startAttributes = array(); |
||
133 | $endAttributes = array(); |
||
134 | |||
135 | while (1) { |
||
136 | if (isset($this->tokens[++$this->pos])) { |
||
137 | $token = $this->tokens[$this->pos]; |
||
138 | } else { |
||
139 | // EOF token with ID 0 |
||
140 | $token = "\0"; |
||
141 | } |
||
142 | |||
143 | if (isset($this->usedAttributes['startTokenPos'])) { |
||
144 | $startAttributes['startTokenPos'] = $this->pos; |
||
145 | } |
||
146 | if (isset($this->usedAttributes['startFilePos'])) { |
||
147 | $startAttributes['startFilePos'] = $this->filePos; |
||
148 | } |
||
149 | |||
150 | if (is_string($token)) { |
||
151 | // bug in token_get_all |
||
152 | if ('b"' === $token) { |
||
153 | $value = 'b"'; |
||
154 | $this->filePos += 2; |
||
155 | $id = ord('"'); |
||
156 | } else { |
||
157 | $value = $token; |
||
158 | $this->filePos += 1; |
||
159 | $id = ord($token); |
||
160 | } |
||
161 | |||
162 | if (isset($this->usedAttributes['startLine'])) { |
||
163 | $startAttributes['startLine'] = $this->line; |
||
164 | } |
||
165 | if (isset($this->usedAttributes['endLine'])) { |
||
166 | $endAttributes['endLine'] = $this->line; |
||
167 | } |
||
168 | if (isset($this->usedAttributes['endTokenPos'])) { |
||
169 | $endAttributes['endTokenPos'] = $this->pos; |
||
170 | } |
||
171 | if (isset($this->usedAttributes['endFilePos'])) { |
||
172 | $endAttributes['endFilePos'] = $this->filePos - 1; |
||
173 | } |
||
174 | |||
175 | return $id; |
||
176 | } else { |
||
177 | $this->line += substr_count($token[1], "\n"); |
||
178 | $this->filePos += strlen($token[1]); |
||
179 | |||
180 | if (T_COMMENT === $token[0]) { |
||
181 | View Code Duplication | if (isset($this->usedAttributes['comments'])) { |
|
182 | $startAttributes['comments'][] = new Comment($token[1], $token[2]); |
||
183 | } |
||
184 | } elseif (T_DOC_COMMENT === $token[0]) { |
||
185 | View Code Duplication | if (isset($this->usedAttributes['comments'])) { |
|
186 | $startAttributes['comments'][] = new Comment\Doc($token[1], $token[2]); |
||
187 | } |
||
188 | } elseif (!isset($this->dropTokens[$token[0]])) { |
||
189 | $value = $token[1]; |
||
190 | |||
191 | if (isset($this->usedAttributes['startLine'])) { |
||
192 | $startAttributes['startLine'] = $token[2]; |
||
193 | } |
||
194 | if (isset($this->usedAttributes['endLine'])) { |
||
195 | $endAttributes['endLine'] = $this->line; |
||
196 | } |
||
197 | if (isset($this->usedAttributes['endTokenPos'])) { |
||
198 | $endAttributes['endTokenPos'] = $this->pos; |
||
199 | } |
||
200 | if (isset($this->usedAttributes['endFilePos'])) { |
||
201 | $endAttributes['endFilePos'] = $this->filePos - 1; |
||
202 | } |
||
203 | |||
204 | return $this->tokenMap[$token[0]]; |
||
205 | } |
||
206 | } |
||
207 | } |
||
208 | |||
209 | throw new \RuntimeException('Reached end of lexer loop'); |
||
210 | } |
||
211 | |||
212 | /** |
||
213 | * Returns the token array for current code. |
||
214 | * |
||
215 | * The token array is in the same format as provided by the |
||
216 | * token_get_all() function and does not discard tokens (i.e. |
||
217 | * whitespace and comments are included). The token position |
||
218 | * attributes are against this token array. |
||
219 | * |
||
220 | * @return array Array of tokens in token_get_all() format |
||
221 | */ |
||
222 | public function getTokens() { |
||
223 | return $this->tokens; |
||
224 | } |
||
225 | |||
226 | /** |
||
227 | * Handles __halt_compiler() by returning the text after it. |
||
228 | * |
||
229 | * @return string Remaining text |
||
230 | */ |
||
231 | public function handleHaltCompiler() { |
||
248 | |||
249 | /** |
||
250 | * Creates the token map. |
||
251 | * |
||
252 | * The token map maps the PHP internal token identifiers |
||
253 | * to the identifiers used by the Parser. Additionally it |
||
254 | * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'. |
||
255 | * |
||
256 | * @return array The token map |
||
257 | */ |
||
258 | protected function createTokenMap() { |
||
295 | } |
||
296 |