Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like TokenStream often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use TokenStream, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 13 | class TokenStream implements ArrayAccess, Iterator |
||
| 14 | { |
||
| 15 | /** |
||
| 16 | * @var integer Number of tokens |
||
| 17 | */ |
||
| 18 | protected $cnt; |
||
| 19 | |||
| 20 | /** |
||
| 21 | * @var string[] List of exact triplet of tokens to exclude from minification |
||
| 22 | */ |
||
| 23 | protected $excludeExact = [ |
||
| 24 | // 1 - - 1 and 1 + + 1 should not become 1--1 or 1++1 |
||
| 25 | '- -', |
||
| 26 | '+ +', |
||
| 27 | // $a - --$b should not become $a---$b |
||
| 28 | '- --', |
||
| 29 | '+ ++' |
||
| 30 | ]; |
||
| 31 | |||
| 32 | /** |
||
| 33 | * @var bool Whether the source code needs to be reparsed before this stream is handed off to a |
||
| 34 | * new pass |
||
| 35 | */ |
||
| 36 | public $needsReparsing; |
||
| 37 | |||
| 38 | /** |
||
| 39 | * @var integer Current token index |
||
| 40 | */ |
||
| 41 | protected $offset; |
||
| 42 | |||
| 43 | /** |
||
| 44 | * @var bool Whether some tokens have been removed |
||
| 45 | */ |
||
| 46 | protected $sparse; |
||
| 47 | |||
| 48 | /** |
||
| 49 | * @var array<array|string> List of tokens |
||
| 50 | */ |
||
| 51 | protected $tokens; |
||
| 52 | |||
| 53 | /** |
||
| 54 | * Constructor |
||
| 55 | * |
||
| 56 | * @param string $src Original source code |
||
| 57 | */ |
||
| 58 | 26 | public function __construct($src) |
|
| 62 | |||
| 63 | /** |
||
| 64 | * Test whether current token can be removed safely |
||
| 65 | * |
||
| 66 | * @return bool |
||
| 67 | */ |
||
| 68 | 2 | public function canRemoveCurrentToken() |
|
| 69 | { |
||
| 70 | 2 | $prevToken = $this->lookbehind(); |
|
| 71 | 2 | $nextToken = $this->lookahead(); |
|
| 72 | |||
| 73 | 2 | if ($prevToken === false || $nextToken === false) |
|
| 74 | { |
||
| 75 | return true; |
||
| 76 | } |
||
| 77 | |||
| 78 | 2 | if (is_array($prevToken)) |
|
| 79 | { |
||
| 80 | 2 | if ($prevToken[0] === T_COMMENT && substr($prevToken[1], 0, 2) === '//') |
|
| 81 | { |
||
| 82 | 1 | return false; |
|
| 83 | } |
||
| 84 | |||
| 85 | 1 | $prevToken = $prevToken[1]; |
|
| 86 | } |
||
| 87 | 1 | if (is_array($nextToken)) |
|
| 88 | { |
||
| 89 | 1 | $nextToken = $nextToken[1]; |
|
| 90 | } |
||
| 91 | |||
| 92 | 1 | $str = $prevToken . ' ' . $nextToken; |
|
| 93 | 1 | if (in_array($str, $this->excludeExact, true)) |
|
| 94 | { |
||
| 95 | return false; |
||
| 96 | } |
||
| 97 | |||
| 98 | 1 | $delimiters = "\t\n\r !\"#$%&'()*+,-./:;<=>?@[\\]^`{|}~"; |
|
| 99 | 1 | $prevChar = substr($prevToken, -1); |
|
| 100 | 1 | $nextChar = $nextToken[0]; |
|
| 101 | |||
| 102 | 1 | return (strpos($delimiters, $prevChar) !== false || strpos($delimiters, $nextChar) !== false); |
|
| 103 | } |
||
| 104 | |||
| 105 | /** |
||
| 106 | * Test whether current token is the given type of token |
||
| 107 | * |
||
| 108 | * @param integer $tokenValue |
||
| 109 | * @return bool |
||
| 110 | */ |
||
| 111 | public function is($tokenValue) |
||
| 115 | |||
| 116 | /** |
||
| 117 | * Test whether current token is any of the given types of token |
||
| 118 | * |
||
| 119 | * @param integer[] $tokenValues |
||
| 120 | * @return bool |
||
| 121 | */ |
||
| 122 | public function isAny(array $tokenValues) |
||
| 126 | |||
| 127 | /** |
||
| 128 | * Test whether there's a token at given offset |
||
| 129 | * |
||
| 130 | * @param integer $offset |
||
| 131 | * @return bool |
||
| 132 | */ |
||
| 133 | 2 | public function offsetExists($offset) |
|
| 137 | |||
| 138 | /** |
||
| 139 | * Return the token stored at given offset |
||
| 140 | * |
||
| 141 | * @param integer $offset |
||
| 142 | * @return array|string |
||
| 143 | */ |
||
| 144 | 2 | public function offsetGet($offset) |
|
| 148 | |||
| 149 | /** |
||
| 150 | * Replace the token stored at given offset |
||
| 151 | * |
||
| 152 | * @param integer $offset |
||
| 153 | * @param array|string $token |
||
| 154 | * @return void |
||
| 155 | */ |
||
| 156 | 1 | public function offsetSet($offset, $token) |
|
| 160 | |||
| 161 | /** |
||
| 162 | * Remove the token stored at given offset |
||
| 163 | * |
||
| 164 | * @return void |
||
| 165 | */ |
||
| 166 | 1 | public function offsetUnset($offset) |
|
| 171 | |||
| 172 | /** |
||
| 173 | * Return the current token |
||
| 174 | * |
||
| 175 | * @return array|string |
||
| 176 | */ |
||
| 177 | 7 | public function current() |
|
| 181 | |||
| 182 | /** |
||
| 183 | * Get current token's text |
||
| 184 | * |
||
| 185 | * @return string |
||
| 186 | */ |
||
| 187 | public function currentText() |
||
| 191 | |||
| 192 | /** |
||
| 193 | * Test whether current token is noise (whitespace or comment) |
||
| 194 | * |
||
| 195 | * @return bool |
||
| 196 | */ |
||
| 197 | 9 | public function isNoise() |
|
| 201 | |||
| 202 | /** |
||
| 203 | * Return the offset of current token |
||
| 204 | * |
||
| 205 | * @return integer |
||
| 206 | */ |
||
| 207 | 11 | public function key() |
|
| 211 | |||
| 212 | /** |
||
| 213 | * Peek at the next token |
||
| 214 | * |
||
| 215 | * @return array|string|false |
||
| 216 | */ |
||
| 217 | 2 | View Code Duplication | public function lookahead() |
| 218 | { |
||
| 219 | 2 | $i = $this->offset; |
|
| 220 | 2 | while (++$i < $this->cnt) |
|
| 221 | { |
||
| 222 | 2 | if (isset($this->tokens[$i])) |
|
| 223 | { |
||
| 224 | 2 | return $this->tokens[$i]; |
|
| 225 | } |
||
| 226 | } |
||
| 227 | |||
| 228 | return false; |
||
| 229 | } |
||
| 230 | |||
| 231 | /** |
||
| 232 | * Peek at the previous token |
||
| 233 | * |
||
| 234 | * @return array|string|false |
||
| 235 | */ |
||
| 236 | 2 | View Code Duplication | public function lookbehind() |
| 237 | { |
||
| 238 | 2 | $i = $this->offset; |
|
| 239 | 2 | while (--$i >= 0) |
|
| 240 | { |
||
| 241 | 2 | if (isset($this->tokens[$i])) |
|
| 242 | { |
||
| 243 | 2 | return $this->tokens[$i]; |
|
| 244 | } |
||
| 245 | } |
||
| 246 | |||
| 247 | return false; |
||
| 248 | } |
||
| 249 | |||
| 250 | /** |
||
| 251 | * Move to the next token in the stream |
||
| 252 | * |
||
| 253 | * @return void |
||
| 254 | */ |
||
| 255 | 7 | public function next() |
|
| 259 | |||
| 260 | /** |
||
| 261 | * Normalize the whitespace at the end of single-line comments |
||
| 262 | * |
||
| 263 | * Will remove the newline at the end of single-line comments and put it either in the next token |
||
| 264 | * if it's a T_WHITESPACE or it will insert a T_WHITESPACE otherwise. |
||
| 265 | * |
||
| 266 | * @return void |
||
| 267 | */ |
||
| 268 | 26 | protected function normalizeSingleLineComments() |
|
| 269 | { |
||
| 270 | 26 | $keys = []; |
|
| 271 | 26 | foreach ($this->tokens as $k => $token) |
|
| 272 | { |
||
| 273 | // Remove the newline at the end of comments to put it in the next token |
||
| 274 | 26 | if ($token[0] === T_COMMENT && $token[1][1] === '/') |
|
| 275 | { |
||
| 276 | 12 | $this->tokens[$k][1] = rtrim($token[1], "\n"); |
|
| 277 | 26 | $keys[] = $k + 1; |
|
| 278 | } |
||
| 279 | } |
||
| 280 | |||
| 281 | 26 | foreach (array_reverse($keys) as $k) |
|
| 282 | { |
||
| 283 | 12 | if (isset($this->tokens[$k]) && $this->tokens[$k][0] === T_WHITESPACE) |
|
| 284 | { |
||
| 285 | 5 | $this->tokens[$k][1] = "\n" . $this->tokens[$k][1]; |
|
| 286 | } |
||
| 287 | else |
||
| 288 | { |
||
| 289 | 12 | array_splice($this->tokens, $k, 0, [[T_WHITESPACE, "\n"]]); |
|
| 290 | } |
||
| 291 | } |
||
| 292 | 26 | } |
|
| 293 | |||
| 294 | /** |
||
| 295 | * Parse/tokenize given PHP source |
||
| 296 | * |
||
| 297 | * @param string $src |
||
| 298 | * @return void |
||
| 299 | */ |
||
| 300 | 26 | protected function parse($src) |
|
| 301 | { |
||
| 302 | 26 | $this->tokens = token_get_all($src); |
|
| 303 | 26 | foreach ($this->tokens as $k => &$token) |
|
| 304 | { |
||
| 305 | 26 | if (is_array($token)) |
|
| 306 | { |
||
| 307 | 26 | unset($token[2]); |
|
| 308 | } |
||
| 309 | } |
||
| 310 | 26 | $this->normalizeSingleLineComments(); |
|
| 311 | |||
| 312 | 26 | $this->cnt = count($this->tokens); |
|
| 313 | 26 | $this->offset = 0; |
|
| 314 | 26 | $this->needsReparsing = false; |
|
| 315 | 26 | $this->sparse = false; |
|
| 316 | 26 | } |
|
| 317 | |||
| 318 | /** |
||
| 319 | * Move to the previous token in the stream |
||
| 320 | * |
||
| 321 | * @return void |
||
| 322 | */ |
||
| 323 | public function previous() |
||
| 324 | { |
||
| 325 | while ($this->offset > 0) |
||
| 326 | { |
||
| 327 | --$this->offset; |
||
| 328 | if (isset($this->tokens[$this->offset])) |
||
| 329 | { |
||
| 330 | break; |
||
| 331 | } |
||
| 332 | } |
||
| 333 | } |
||
| 334 | |||
| 335 | /** |
||
| 336 | * Remove current token |
||
| 337 | * |
||
| 338 | * @return void |
||
| 339 | */ |
||
| 340 | public function remove() |
||
| 344 | |||
| 345 | /** |
||
| 346 | * Replace current token |
||
| 347 | * |
||
| 348 | * @param array|string $token Token replacement |
||
| 349 | * @return void |
||
| 350 | */ |
||
| 351 | public function replace($token) |
||
| 355 | |||
| 356 | /** |
||
| 357 | * Reset this stream |
||
| 358 | * |
||
| 359 | * @return void |
||
| 360 | */ |
||
| 361 | 8 | public function reset() |
|
| 362 | { |
||
| 363 | 8 | $this->offset = 0; |
|
| 364 | 8 | if ($this->needsReparsing) |
|
| 365 | { |
||
| 366 | $this->parse($this->serialize()); |
||
| 367 | } |
||
| 368 | 8 | elseif ($this->sparse) |
|
| 369 | { |
||
| 370 | $this->tokens = array_values($this->tokens); |
||
| 371 | } |
||
| 372 | 8 | } |
|
| 373 | |||
| 374 | /** |
||
| 375 | * Rewind/reset this stream |
||
| 376 | * |
||
| 377 | * @return void |
||
| 378 | */ |
||
| 379 | 1 | public function rewind() |
|
| 383 | |||
| 384 | /** |
||
| 385 | * Move the stream to given offset |
||
| 386 | * |
||
| 387 | * @param integer $offset |
||
| 388 | * @return void |
||
| 389 | */ |
||
| 390 | 8 | public function seek($offset) |
|
| 394 | |||
| 395 | /** |
||
| 396 | * Serialize these tokens back to source code |
||
| 397 | * |
||
| 398 | * @return string |
||
| 399 | */ |
||
| 400 | public function serialize() |
||
| 401 | { |
||
| 402 | $src = ''; |
||
| 403 | foreach ($this->tokens as $token) |
||
| 404 | { |
||
| 405 | $src .= (is_array($token)) ? $token[1] : $token; |
||
| 406 | } |
||
| 407 | |||
| 408 | return $src; |
||
| 409 | } |
||
| 410 | |||
| 411 | /** |
||
| 412 | * Skip all whitespace, comments and docblocks starting at current offset |
||
| 413 | * |
||
| 414 | * @return void |
||
| 415 | */ |
||
| 416 | 5 | public function skipNoise() |
|
| 417 | { |
||
| 418 | 5 | while ($this->valid()) |
|
| 419 | { |
||
| 420 | 5 | if (!$this->isNoise()) |
|
| 421 | { |
||
| 422 | 5 | break; |
|
| 423 | } |
||
| 424 | 4 | $this->next(); |
|
| 425 | } |
||
| 426 | 5 | } |
|
| 427 | |||
| 428 | /** |
||
| 429 | * Skip all whitespace starting at current offset |
||
| 430 | * |
||
| 431 | * @return void |
||
| 432 | */ |
||
| 433 | 2 | public function skipWhitespace() |
|
| 434 | { |
||
| 435 | 2 | while ($this->offset < $this->cnt) |
|
| 436 | { |
||
| 437 | 2 | if ($this->tokens[$this->offset][0] !== T_WHITESPACE) |
|
| 438 | { |
||
| 439 | 2 | break; |
|
| 440 | } |
||
| 441 | 1 | ++$this->offset; |
|
| 442 | } |
||
| 443 | 2 | } |
|
| 444 | |||
| 445 | /** |
||
| 446 | * Iterate through tokens until the stream reaches a token of given value or the end of stream |
||
| 447 | * |
||
| 448 | * @param integer $tokenValue The target value, e.g. T_ELSE |
||
| 449 | * @return bool Whether a matching token was found |
||
| 450 | */ |
||
| 451 | 10 | View Code Duplication | public function skipTo($tokenValue) |
| 452 | { |
||
| 453 | 10 | while (++$this->offset < $this->cnt) |
|
| 454 | { |
||
| 455 | 10 | if (isset($this->tokens[$this->offset][0]) && $this->tokens[$this->offset][0] === $tokenValue) |
|
| 456 | { |
||
| 457 | 7 | return true; |
|
| 458 | } |
||
| 459 | } |
||
| 460 | |||
| 461 | 9 | return false; |
|
| 462 | } |
||
| 463 | |||
| 464 | /** |
||
| 465 | * Iterate through tokens until the stream reaches given token or the end of stream |
||
| 466 | * |
||
| 467 | * @param array|string $token The target token, either a string or a [tokenValue, string] pair |
||
| 468 | * @return bool Whether a matching token was found |
||
| 469 | */ |
||
| 470 | 2 | View Code Duplication | public function skipToToken($token) |
| 471 | { |
||
| 472 | 2 | while (++$this->offset < $this->cnt) |
|
| 473 | { |
||
| 474 | 2 | if (isset($this->tokens[$this->offset]) && $this->tokens[$this->offset] === $token) |
|
| 475 | { |
||
| 476 | 2 | return true; |
|
| 477 | } |
||
| 478 | } |
||
| 479 | |||
| 480 | return false; |
||
| 481 | } |
||
| 482 | |||
| 483 | /** |
||
| 484 | * Test whether the stream is at a valid offset |
||
| 485 | * |
||
| 486 | * @return bool |
||
| 487 | */ |
||
| 488 | 8 | public function valid() |
|
| 492 | } |