Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like Lexer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Lexer, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 7 | class Lexer |
||
| 8 | { |
||
| 9 | protected $code; |
||
| 10 | protected $tokens; |
||
| 11 | protected $pos; |
||
| 12 | protected $line; |
||
| 13 | protected $filePos; |
||
| 14 | |||
| 15 | protected $tokenMap; |
||
| 16 | protected $dropTokens; |
||
| 17 | |||
| 18 | protected $usedAttributes; |
||
| 19 | |||
| 20 | /** |
||
| 21 | * Creates a Lexer. |
||
| 22 | * |
||
| 23 | * @param array $options Options array. Currently only the 'usedAttributes' option is supported, |
||
| 24 | * which is an array of attributes to add to the AST nodes. Possible attributes |
||
| 25 | * are: 'comments', 'startLine', 'endLine', 'startTokenPos', 'endTokenPos', |
||
| 26 | * 'startFilePos', 'endFilePos'. The option defaults to the first three. |
||
| 27 | * For more info see getNextToken() docs. |
||
| 28 | */ |
||
| 29 | public function __construct(array $options = array()) { |
||
| 30 | // map from internal tokens to PhpParser tokens |
||
| 31 | $this->tokenMap = $this->createTokenMap(); |
||
| 32 | |||
| 33 | // map of tokens to drop while lexing (the map is only used for isset lookup, |
||
| 34 | // that's why the value is simply set to 1; the value is never actually used.) |
||
| 35 | $this->dropTokens = array_fill_keys(array(T_WHITESPACE, T_OPEN_TAG), 1); |
||
| 36 | |||
| 37 | // the usedAttributes member is a map of the used attribute names to a dummy |
||
| 38 | // value (here "true") |
||
| 39 | $options += array( |
||
| 40 | 'usedAttributes' => array('comments', 'startLine', 'endLine'), |
||
| 41 | ); |
||
| 42 | $this->usedAttributes = array_fill_keys($options['usedAttributes'], true); |
||
| 43 | } |
||
| 44 | |||
| 45 | /** |
||
| 46 | * Initializes the lexer for lexing the provided source code. |
||
| 47 | * |
||
| 48 | * @param string $code The source code to lex |
||
| 49 | * |
||
| 50 | * @throws Error on lexing errors (unterminated comment or unexpected character) |
||
| 51 | */ |
||
| 52 | public function startLexing($code) { |
||
| 53 | $scream = ini_set('xdebug.scream', '0'); |
||
| 54 | |||
| 55 | $this->resetErrors(); |
||
| 56 | $this->tokens = @token_get_all($code); |
||
| 57 | $this->handleErrors(); |
||
| 58 | |||
| 59 | if (false !== $scream) { |
||
| 60 | ini_set('xdebug.scream', $scream); |
||
| 61 | } |
||
| 62 | |||
| 63 | $this->code = $code; // keep the code around for __halt_compiler() handling |
||
| 64 | $this->pos = -1; |
||
| 65 | $this->line = 1; |
||
| 66 | $this->filePos = 0; |
||
| 67 | } |
||
| 68 | |||
| 69 | protected function resetErrors() { |
||
| 79 | |||
| 80 | protected function handleErrors() { |
||
| 108 | |||
| 109 | /** |
||
| 110 | * Fetches the next token. |
||
| 111 | * |
||
| 112 | * The available attributes are determined by the 'usedAttributes' option, which can |
||
| 113 | * be specified in the constructor. The following attributes are supported: |
||
| 114 | * |
||
| 115 | * * 'comments' => Array of PhpParser\Comment or PhpParser\Comment\Doc instances, |
||
| 116 | * representing all comments that occurred between the previous |
||
| 117 | * non-discarded token and the current one. |
||
| 118 | * * 'startLine' => Line in which the node starts. |
||
| 119 | * * 'endLine' => Line in which the node ends. |
||
| 120 | * * 'startTokenPos' => Offset into the token array of the first token in the node. |
||
| 121 | * * 'endTokenPos' => Offset into the token array of the last token in the node. |
||
| 122 | * * 'startFilePos' => Offset into the code string of the first character that is part of the node. |
||
| 123 | * * 'endFilePos' => Offset into the code string of the last character that is part of the node |
||
| 124 | * |
||
| 125 | * @param mixed $value Variable to store token content in |
||
| 126 | * @param mixed $startAttributes Variable to store start attributes in |
||
| 127 | * @param mixed $endAttributes Variable to store end attributes in |
||
| 128 | * |
||
| 129 | * @return int Token id |
||
| 130 | */ |
||
| 131 | public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) { |
||
| 132 | $startAttributes = array(); |
||
| 133 | $endAttributes = array(); |
||
| 134 | |||
| 135 | while (1) { |
||
| 136 | if (isset($this->tokens[++$this->pos])) { |
||
| 137 | $token = $this->tokens[$this->pos]; |
||
| 138 | } else { |
||
| 139 | // EOF token with ID 0 |
||
| 140 | $token = "\0"; |
||
| 141 | } |
||
| 142 | |||
| 143 | if (isset($this->usedAttributes['startTokenPos'])) { |
||
| 144 | $startAttributes['startTokenPos'] = $this->pos; |
||
| 145 | } |
||
| 146 | if (isset($this->usedAttributes['startFilePos'])) { |
||
| 147 | $startAttributes['startFilePos'] = $this->filePos; |
||
| 148 | } |
||
| 149 | |||
| 150 | if (is_string($token)) { |
||
| 151 | // bug in token_get_all |
||
| 152 | if ('b"' === $token) { |
||
| 153 | $value = 'b"'; |
||
| 154 | $this->filePos += 2; |
||
| 155 | $id = ord('"'); |
||
| 156 | } else { |
||
| 157 | $value = $token; |
||
| 158 | $this->filePos += 1; |
||
| 159 | $id = ord($token); |
||
| 160 | } |
||
| 161 | |||
| 162 | if (isset($this->usedAttributes['startLine'])) { |
||
| 163 | $startAttributes['startLine'] = $this->line; |
||
| 164 | } |
||
| 165 | if (isset($this->usedAttributes['endLine'])) { |
||
| 166 | $endAttributes['endLine'] = $this->line; |
||
| 167 | } |
||
| 168 | if (isset($this->usedAttributes['endTokenPos'])) { |
||
| 169 | $endAttributes['endTokenPos'] = $this->pos; |
||
| 170 | } |
||
| 171 | if (isset($this->usedAttributes['endFilePos'])) { |
||
| 172 | $endAttributes['endFilePos'] = $this->filePos - 1; |
||
| 173 | } |
||
| 174 | |||
| 175 | return $id; |
||
| 176 | } else { |
||
| 177 | $this->line += substr_count($token[1], "\n"); |
||
| 178 | $this->filePos += strlen($token[1]); |
||
| 179 | |||
| 180 | if (T_COMMENT === $token[0]) { |
||
| 181 | View Code Duplication | if (isset($this->usedAttributes['comments'])) { |
|
| 182 | $startAttributes['comments'][] = new Comment($token[1], $token[2]); |
||
| 183 | } |
||
| 184 | } elseif (T_DOC_COMMENT === $token[0]) { |
||
| 185 | View Code Duplication | if (isset($this->usedAttributes['comments'])) { |
|
| 186 | $startAttributes['comments'][] = new Comment\Doc($token[1], $token[2]); |
||
| 187 | } |
||
| 188 | } elseif (!isset($this->dropTokens[$token[0]])) { |
||
| 189 | $value = $token[1]; |
||
| 190 | |||
| 191 | if (isset($this->usedAttributes['startLine'])) { |
||
| 192 | $startAttributes['startLine'] = $token[2]; |
||
| 193 | } |
||
| 194 | if (isset($this->usedAttributes['endLine'])) { |
||
| 195 | $endAttributes['endLine'] = $this->line; |
||
| 196 | } |
||
| 197 | if (isset($this->usedAttributes['endTokenPos'])) { |
||
| 198 | $endAttributes['endTokenPos'] = $this->pos; |
||
| 199 | } |
||
| 200 | if (isset($this->usedAttributes['endFilePos'])) { |
||
| 201 | $endAttributes['endFilePos'] = $this->filePos - 1; |
||
| 202 | } |
||
| 203 | |||
| 204 | return $this->tokenMap[$token[0]]; |
||
| 205 | } |
||
| 206 | } |
||
| 207 | } |
||
| 208 | |||
| 209 | throw new \RuntimeException('Reached end of lexer loop'); |
||
| 210 | } |
||
| 211 | |||
| 212 | /** |
||
| 213 | * Returns the token array for current code. |
||
| 214 | * |
||
| 215 | * The token array is in the same format as provided by the |
||
| 216 | * token_get_all() function and does not discard tokens (i.e. |
||
| 217 | * whitespace and comments are included). The token position |
||
| 218 | * attributes are against this token array. |
||
| 219 | * |
||
| 220 | * @return array Array of tokens in token_get_all() format |
||
| 221 | */ |
||
| 222 | public function getTokens() { |
||
| 223 | return $this->tokens; |
||
| 224 | } |
||
| 225 | |||
| 226 | /** |
||
| 227 | * Handles __halt_compiler() by returning the text after it. |
||
| 228 | * |
||
| 229 | * @return string Remaining text |
||
| 230 | */ |
||
| 231 | public function handleHaltCompiler() { |
||
| 248 | |||
| 249 | /** |
||
| 250 | * Creates the token map. |
||
| 251 | * |
||
| 252 | * The token map maps the PHP internal token identifiers |
||
| 253 | * to the identifiers used by the Parser. Additionally it |
||
| 254 | * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'. |
||
| 255 | * |
||
| 256 | * @return array The token map |
||
| 257 | */ |
||
| 258 | protected function createTokenMap() { |
||
| 295 | } |
||
| 296 |