Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.
Common duplication problems, and corresponding solutions are:
Complex classes like TokenStream often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use TokenStream, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
13 | class TokenStream implements ArrayAccess, Iterator |
||
14 | { |
||
15 | /** |
||
16 | * @var integer Number of tokens |
||
17 | */ |
||
18 | protected $cnt; |
||
19 | |||
20 | /** |
||
21 | * @var string[] List of exact triplet of tokens to exclude from minification |
||
22 | */ |
||
23 | protected $excludeExact = [ |
||
24 | // 1 - - 1 and 1 + + 1 should not become 1--1 or 1++1 |
||
25 | '- -', |
||
26 | '+ +', |
||
27 | // $a - --$b should not become $a---$b |
||
28 | '- --', |
||
29 | '+ ++' |
||
30 | ]; |
||
31 | |||
32 | /** |
||
33 | * @var bool Whether the source code needs to be reparsed before this stream is handed off to a |
||
34 | * new pass |
||
35 | */ |
||
36 | public $needsReparsing; |
||
37 | |||
38 | /** |
||
39 | * @var integer Current token index |
||
40 | */ |
||
41 | protected $offset; |
||
42 | |||
43 | /** |
||
44 | * @var bool Whether some tokens have been removed |
||
45 | */ |
||
46 | protected $sparse; |
||
47 | |||
48 | /** |
||
49 | * @var array<array|string> List of tokens |
||
50 | */ |
||
51 | protected $tokens; |
||
52 | |||
53 | /** |
||
54 | * Constructor |
||
55 | * |
||
56 | * @param string $src Original source code |
||
57 | */ |
||
58 | 26 | public function __construct($src) |
|
62 | |||
63 | /** |
||
64 | * Test whether current token can be removed safely |
||
65 | * |
||
66 | * @return bool |
||
67 | */ |
||
68 | 2 | public function canRemoveCurrentToken() |
|
69 | { |
||
70 | 2 | $prevToken = $this->lookbehind(); |
|
71 | 2 | $nextToken = $this->lookahead(); |
|
72 | |||
73 | 2 | if ($prevToken === false || $nextToken === false) |
|
74 | { |
||
75 | return true; |
||
76 | } |
||
77 | |||
78 | 2 | if (is_array($prevToken)) |
|
79 | { |
||
80 | 2 | if ($prevToken[0] === T_COMMENT && substr($prevToken[1], 0, 2) === '//') |
|
81 | { |
||
82 | 1 | return false; |
|
83 | } |
||
84 | |||
85 | 1 | $prevToken = $prevToken[1]; |
|
86 | } |
||
87 | 1 | if (is_array($nextToken)) |
|
88 | { |
||
89 | 1 | $nextToken = $nextToken[1]; |
|
90 | } |
||
91 | |||
92 | 1 | $str = $prevToken . ' ' . $nextToken; |
|
93 | 1 | if (in_array($str, $this->excludeExact, true)) |
|
94 | { |
||
95 | return false; |
||
96 | } |
||
97 | |||
98 | 1 | $delimiters = "\t\n\r !\"#$%&'()*+,-./:;<=>?@[\\]^`{|}~"; |
|
99 | 1 | $prevChar = substr($prevToken, -1); |
|
100 | 1 | $nextChar = $nextToken[0]; |
|
101 | |||
102 | 1 | return (strpos($delimiters, $prevChar) !== false || strpos($delimiters, $nextChar) !== false); |
|
103 | } |
||
104 | |||
105 | /** |
||
106 | * Test whether current token is the given type of token |
||
107 | * |
||
108 | * @param integer $tokenValue |
||
109 | * @return bool |
||
110 | */ |
||
111 | public function is($tokenValue) |
||
115 | |||
116 | /** |
||
117 | * Test whether current token is any of the given types of token |
||
118 | * |
||
119 | * @param integer[] $tokenValues |
||
120 | * @return bool |
||
121 | */ |
||
122 | public function isAny(array $tokenValues) |
||
126 | |||
127 | /** |
||
128 | * Test whether there's a token at given offset |
||
129 | * |
||
130 | * @param integer $offset |
||
131 | * @return bool |
||
132 | */ |
||
133 | 2 | public function offsetExists($offset) |
|
137 | |||
138 | /** |
||
139 | * Return the token stored at given offset |
||
140 | * |
||
141 | * @param integer $offset |
||
142 | * @return array|string |
||
143 | */ |
||
144 | 2 | public function offsetGet($offset) |
|
148 | |||
149 | /** |
||
150 | * Replace the token stored at given offset |
||
151 | * |
||
152 | * @param integer $offset |
||
153 | * @param array|string $token |
||
154 | * @return void |
||
155 | */ |
||
156 | 1 | public function offsetSet($offset, $token) |
|
160 | |||
161 | /** |
||
162 | * Remove the token stored at given offset |
||
163 | * |
||
164 | * @return void |
||
165 | */ |
||
166 | 1 | public function offsetUnset($offset) |
|
171 | |||
172 | /** |
||
173 | * Return the current token |
||
174 | * |
||
175 | * @return array|string |
||
176 | */ |
||
177 | 7 | public function current() |
|
181 | |||
182 | /** |
||
183 | * Get current token's text |
||
184 | * |
||
185 | * @return string |
||
186 | */ |
||
187 | public function currentText() |
||
191 | |||
192 | /** |
||
193 | * Test whether current token is noise (whitespace or comment) |
||
194 | * |
||
195 | * @return bool |
||
196 | */ |
||
197 | 9 | public function isNoise() |
|
201 | |||
202 | /** |
||
203 | * Return the offset of current token |
||
204 | * |
||
205 | * @return integer |
||
206 | */ |
||
207 | 11 | public function key() |
|
211 | |||
212 | /** |
||
213 | * Peek at the next token |
||
214 | * |
||
215 | * @return array|string|false |
||
216 | */ |
||
217 | 2 | View Code Duplication | public function lookahead() |
218 | { |
||
219 | 2 | $i = $this->offset; |
|
220 | 2 | while (++$i < $this->cnt) |
|
221 | { |
||
222 | 2 | if (isset($this->tokens[$i])) |
|
223 | { |
||
224 | 2 | return $this->tokens[$i]; |
|
225 | } |
||
226 | } |
||
227 | |||
228 | return false; |
||
229 | } |
||
230 | |||
231 | /** |
||
232 | * Peek at the previous token |
||
233 | * |
||
234 | * @return array|string|false |
||
235 | */ |
||
236 | 2 | View Code Duplication | public function lookbehind() |
237 | { |
||
238 | 2 | $i = $this->offset; |
|
239 | 2 | while (--$i >= 0) |
|
240 | { |
||
241 | 2 | if (isset($this->tokens[$i])) |
|
242 | { |
||
243 | 2 | return $this->tokens[$i]; |
|
244 | } |
||
245 | } |
||
246 | |||
247 | return false; |
||
248 | } |
||
249 | |||
250 | /** |
||
251 | * Move to the next token in the stream |
||
252 | * |
||
253 | * @return void |
||
254 | */ |
||
255 | 7 | public function next() |
|
259 | |||
260 | /** |
||
261 | * Normalize the whitespace at the end of single-line comments |
||
262 | * |
||
263 | * Will remove the newline at the end of single-line comments and put it either in the next token |
||
264 | * if it's a T_WHITESPACE or it will insert a T_WHITESPACE otherwise. |
||
265 | * |
||
266 | * @return void |
||
267 | */ |
||
268 | 26 | protected function normalizeSingleLineComments() |
|
269 | { |
||
270 | 26 | $keys = []; |
|
271 | 26 | foreach ($this->tokens as $k => $token) |
|
272 | { |
||
273 | // Remove the newline at the end of comments to put it in the next token |
||
274 | 26 | if ($token[0] === T_COMMENT && $token[1][1] === '/') |
|
275 | { |
||
276 | 12 | $this->tokens[$k][1] = rtrim($token[1], "\n"); |
|
277 | 26 | $keys[] = $k + 1; |
|
278 | } |
||
279 | } |
||
280 | |||
281 | 26 | foreach (array_reverse($keys) as $k) |
|
282 | { |
||
283 | 12 | if (isset($this->tokens[$k]) && $this->tokens[$k][0] === T_WHITESPACE) |
|
284 | { |
||
285 | 5 | $this->tokens[$k][1] = "\n" . $this->tokens[$k][1]; |
|
286 | } |
||
287 | else |
||
288 | { |
||
289 | 12 | array_splice($this->tokens, $k, 0, [[T_WHITESPACE, "\n"]]); |
|
290 | } |
||
291 | } |
||
292 | 26 | } |
|
293 | |||
294 | /** |
||
295 | * Parse/tokenize given PHP source |
||
296 | * |
||
297 | * @param string $src |
||
298 | * @return void |
||
299 | */ |
||
300 | 26 | protected function parse($src) |
|
301 | { |
||
302 | 26 | $this->tokens = token_get_all($src); |
|
303 | 26 | foreach ($this->tokens as $k => &$token) |
|
304 | { |
||
305 | 26 | if (is_array($token)) |
|
306 | { |
||
307 | 26 | unset($token[2]); |
|
308 | } |
||
309 | } |
||
310 | 26 | $this->normalizeSingleLineComments(); |
|
311 | |||
312 | 26 | $this->cnt = count($this->tokens); |
|
313 | 26 | $this->offset = 0; |
|
314 | 26 | $this->needsReparsing = false; |
|
315 | 26 | $this->sparse = false; |
|
316 | 26 | } |
|
317 | |||
318 | /** |
||
319 | * Move to the previous token in the stream |
||
320 | * |
||
321 | * @return void |
||
322 | */ |
||
323 | public function previous() |
||
324 | { |
||
325 | while ($this->offset > 0) |
||
326 | { |
||
327 | --$this->offset; |
||
328 | if (isset($this->tokens[$this->offset])) |
||
329 | { |
||
330 | break; |
||
331 | } |
||
332 | } |
||
333 | } |
||
334 | |||
335 | /** |
||
336 | * Remove current token |
||
337 | * |
||
338 | * @return void |
||
339 | */ |
||
340 | public function remove() |
||
344 | |||
345 | /** |
||
346 | * Replace current token |
||
347 | * |
||
348 | * @param array|string $token Token replacement |
||
349 | * @return void |
||
350 | */ |
||
351 | public function replace($token) |
||
355 | |||
356 | /** |
||
357 | * Reset this stream |
||
358 | * |
||
359 | * @return void |
||
360 | */ |
||
361 | 8 | public function reset() |
|
362 | { |
||
363 | 8 | $this->offset = 0; |
|
364 | 8 | if ($this->needsReparsing) |
|
365 | { |
||
366 | $this->parse($this->serialize()); |
||
367 | } |
||
368 | 8 | elseif ($this->sparse) |
|
369 | { |
||
370 | $this->tokens = array_values($this->tokens); |
||
371 | } |
||
372 | 8 | } |
|
373 | |||
374 | /** |
||
375 | * Rewind/reset this stream |
||
376 | * |
||
377 | * @return void |
||
378 | */ |
||
379 | 1 | public function rewind() |
|
383 | |||
384 | /** |
||
385 | * Move the stream to given offset |
||
386 | * |
||
387 | * @param integer $offset |
||
388 | * @return void |
||
389 | */ |
||
390 | 8 | public function seek($offset) |
|
394 | |||
395 | /** |
||
396 | * Serialize these tokens back to source code |
||
397 | * |
||
398 | * @return string |
||
399 | */ |
||
400 | public function serialize() |
||
401 | { |
||
402 | $src = ''; |
||
403 | foreach ($this->tokens as $token) |
||
404 | { |
||
405 | $src .= (is_array($token)) ? $token[1] : $token; |
||
406 | } |
||
407 | |||
408 | return $src; |
||
409 | } |
||
410 | |||
411 | /** |
||
412 | * Skip all whitespace, comments and docblocks starting at current offset |
||
413 | * |
||
414 | * @return void |
||
415 | */ |
||
416 | 5 | public function skipNoise() |
|
417 | { |
||
418 | 5 | while ($this->valid()) |
|
419 | { |
||
420 | 5 | if (!$this->isNoise()) |
|
421 | { |
||
422 | 5 | break; |
|
423 | } |
||
424 | 4 | $this->next(); |
|
425 | } |
||
426 | 5 | } |
|
427 | |||
428 | /** |
||
429 | * Skip all whitespace starting at current offset |
||
430 | * |
||
431 | * @return void |
||
432 | */ |
||
433 | 2 | public function skipWhitespace() |
|
434 | { |
||
435 | 2 | while ($this->offset < $this->cnt) |
|
436 | { |
||
437 | 2 | if ($this->tokens[$this->offset][0] !== T_WHITESPACE) |
|
438 | { |
||
439 | 2 | break; |
|
440 | } |
||
441 | 1 | ++$this->offset; |
|
442 | } |
||
443 | 2 | } |
|
444 | |||
445 | /** |
||
446 | * Iterate through tokens until the stream reaches a token of given value or the end of stream |
||
447 | * |
||
448 | * @param integer $tokenValue The target value, e.g. T_ELSE |
||
449 | * @return bool Whether a matching token was found |
||
450 | */ |
||
451 | 10 | View Code Duplication | public function skipTo($tokenValue) |
452 | { |
||
453 | 10 | while (++$this->offset < $this->cnt) |
|
454 | { |
||
455 | 10 | if (isset($this->tokens[$this->offset][0]) && $this->tokens[$this->offset][0] === $tokenValue) |
|
456 | { |
||
457 | 7 | return true; |
|
458 | } |
||
459 | } |
||
460 | |||
461 | 9 | return false; |
|
462 | } |
||
463 | |||
464 | /** |
||
465 | * Iterate through tokens until the stream reaches given token or the end of stream |
||
466 | * |
||
467 | * @param array|string $token The target token, either a string or a [tokenValue, string] pair |
||
468 | * @return bool Whether a matching token was found |
||
469 | */ |
||
470 | 2 | View Code Duplication | public function skipToToken($token) |
471 | { |
||
472 | 2 | while (++$this->offset < $this->cnt) |
|
473 | { |
||
474 | 2 | if (isset($this->tokens[$this->offset]) && $this->tokens[$this->offset] === $token) |
|
475 | { |
||
476 | 2 | return true; |
|
477 | } |
||
478 | } |
||
479 | |||
480 | return false; |
||
481 | } |
||
482 | |||
483 | /** |
||
484 | * Test whether the stream is at a valid offset |
||
485 | * |
||
486 | * @return bool |
||
487 | */ |
||
488 | 8 | public function valid() |
|
492 | } |