Complex classes like Tokenizer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Tokenizer, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
12 | class Tokenizer |
||
13 | { |
||
14 | protected $source; |
||
15 | protected $pos = 0; |
||
16 | protected $line = 1; |
||
17 | protected $lineStart = 0; |
||
18 | |||
19 | /** @var Token */ |
||
20 | protected $lookAhead; |
||
21 | |||
22 | 108 | protected function initTokenizer($source) |
|
27 | |||
28 | /** |
||
29 | * @return Token |
||
30 | */ |
||
31 | 108 | protected function next() |
|
32 | { |
||
33 | 108 | $this->skipWhitespace(); |
|
34 | |||
35 | 108 | return $this->scan(); |
|
36 | } |
||
37 | |||
38 | 108 | protected function skipWhitespace() |
|
39 | { |
||
40 | 108 | while ($this->pos < strlen($this->source)) { |
|
41 | 107 | $ch = $this->source[$this->pos]; |
|
42 | 107 | if ($ch === ' ' || $ch === "\t" || $ch === ',') { |
|
43 | 104 | $this->pos++; |
|
44 | 107 | } elseif ($ch === '#') { |
|
45 | 1 | $this->pos++; |
|
46 | while ( |
||
47 | 1 | $this->pos < strlen($this->source) && |
|
48 | 1 | ($code = ord($this->source[$this->pos])) && |
|
49 | 1 | $code !== 10 && $code !== 13 && $code !== 0x2028 && $code !== 0x2029 |
|
50 | 1 | ) { |
|
51 | 1 | $this->pos++; |
|
52 | 1 | } |
|
53 | 107 | } elseif ($ch === "\r") { |
|
54 | 1 | $this->pos++; |
|
55 | 1 | if ($this->source[$this->pos] === "\n") { |
|
56 | 1 | $this->pos++; |
|
57 | 1 | } |
|
58 | 1 | $this->line++; |
|
59 | 1 | $this->lineStart = $this->pos; |
|
60 | 107 | } elseif ($ch === "\n") { |
|
61 | 41 | $this->pos++; |
|
62 | 41 | $this->line++; |
|
63 | 41 | $this->lineStart = $this->pos; |
|
64 | 41 | } else { |
|
65 | 107 | break; |
|
66 | } |
||
67 | 104 | } |
|
68 | 108 | } |
|
69 | |||
70 | /** |
||
71 | * @return Token |
||
72 | * |
||
73 | * @throws SyntaxErrorException |
||
74 | */ |
||
75 | 108 | protected function scan() |
|
76 | { |
||
77 | 108 | if ($this->pos >= strlen($this->source)) { |
|
78 | 98 | return new Token(Token::TYPE_END, $this->getLine(), $this->getColumn()); |
|
79 | } |
||
80 | |||
81 | 107 | $ch = $this->source[$this->pos]; |
|
82 | switch ($ch) { |
||
83 | 107 | case Token::TYPE_LPAREN: |
|
84 | 61 | ++$this->pos; |
|
85 | |||
86 | 61 | return new Token(Token::TYPE_LPAREN, $this->getLine(), $this->getColumn()); |
|
87 | 107 | case Token::TYPE_RPAREN: |
|
88 | 54 | ++$this->pos; |
|
89 | |||
90 | 54 | return new Token(Token::TYPE_RPAREN, $this->getLine(), $this->getColumn()); |
|
91 | 107 | case Token::TYPE_LBRACE: |
|
92 | 105 | ++$this->pos; |
|
93 | |||
94 | 105 | return new Token(Token::TYPE_LBRACE, $this->getLine(), $this->getColumn()); |
|
95 | 107 | case Token::TYPE_RBRACE: |
|
96 | 98 | ++$this->pos; |
|
97 | |||
98 | 98 | return new Token(Token::TYPE_RBRACE, $this->getLine(), $this->getColumn()); |
|
99 | 106 | case Token::TYPE_COMMA: |
|
100 | ++$this->pos; |
||
101 | |||
102 | return new Token(Token::TYPE_COMMA, $this->getLine(), $this->getColumn()); |
||
103 | 106 | case Token::TYPE_LSQUARE_BRACE: |
|
104 | 16 | ++$this->pos; |
|
105 | |||
106 | 16 | return new Token(Token::TYPE_LSQUARE_BRACE, $this->getLine(), $this->getColumn()); |
|
107 | 106 | case Token::TYPE_RSQUARE_BRACE: |
|
108 | 15 | ++$this->pos; |
|
109 | |||
110 | 15 | return new Token(Token::TYPE_RSQUARE_BRACE, $this->getLine(), $this->getColumn()); |
|
111 | 106 | case Token::TYPE_REQUIRED: |
|
112 | 3 | ++$this->pos; |
|
113 | |||
114 | 3 | return new Token(Token::TYPE_REQUIRED, $this->getLine(), $this->getColumn()); |
|
115 | 106 | case Token::TYPE_AT: |
|
116 | ++$this->pos; |
||
117 | |||
118 | return new Token(Token::TYPE_AT, $this->getLine(), $this->getColumn()); |
||
119 | 106 | case Token::TYPE_COLON: |
|
120 | 65 | ++$this->pos; |
|
121 | |||
122 | 65 | return new Token(Token::TYPE_COLON, $this->getLine(), $this->getColumn()); |
|
123 | |||
124 | 106 | case Token::TYPE_EQUAL: |
|
125 | 1 | ++$this->pos; |
|
126 | |||
127 | 1 | return new Token(Token::TYPE_EQUAL, $this->getLine(), $this->getColumn()); |
|
128 | |||
129 | 106 | case Token::TYPE_POINT: |
|
130 | 17 | if ($this->checkFragment()) { |
|
131 | 16 | return new Token(Token::TYPE_FRAGMENT_REFERENCE, $this->getLine(), $this->getColumn()); |
|
132 | } |
||
133 | |||
134 | 1 | return new Token(Token::TYPE_POINT, $this->getLine(), $this->getColumn()); |
|
135 | |||
136 | |||
137 | 106 | case Token::TYPE_VARIABLE: |
|
138 | 13 | ++$this->pos; |
|
139 | |||
140 | 13 | return new Token(Token::TYPE_VARIABLE, $this->getLine(), $this->getColumn()); |
|
141 | } |
||
142 | |||
143 | 106 | if ($ch === '_' || ('a' <= $ch && $ch <= 'z') || ('A' <= $ch && $ch <= 'Z')) { |
|
144 | 105 | return $this->scanWord(); |
|
145 | } |
||
146 | |||
147 | 43 | if ($ch === '-' || ('0' <= $ch && $ch <= '9')) { |
|
148 | 20 | return $this->scanNumber(); |
|
149 | } |
||
150 | |||
151 | 29 | if ($ch === '"') { |
|
152 | 29 | return $this->scanString(); |
|
153 | } |
||
154 | |||
155 | 1 | throw $this->createException('Can\t recognize token type'); |
|
156 | } |
||
157 | |||
158 | 17 | protected function checkFragment() |
|
176 | |||
177 | 105 | protected function scanWord() |
|
178 | { |
||
179 | 105 | $start = $this->pos; |
|
180 | 105 | $this->pos++; |
|
181 | |||
182 | 105 | while ($this->pos < strlen($this->source)) { |
|
183 | 105 | $ch = $this->source[$this->pos]; |
|
184 | |||
185 | 105 | if ($ch === '_' || $ch === '$' || ('a' <= $ch && $ch <= 'z') || ('A' <= $ch && $ch <= 'Z') || ('0' <= $ch && $ch <= '9')) { |
|
186 | 105 | $this->pos++; |
|
187 | 105 | } else { |
|
188 | 104 | break; |
|
189 | } |
||
190 | 105 | } |
|
191 | |||
192 | 105 | $value = substr($this->source, $start, $this->pos - $start); |
|
193 | |||
194 | 105 | return new Token($this->getKeyword($value), $this->getLine(), $this->getColumn(), $value); |
|
195 | } |
||
196 | |||
197 | 105 | protected function getKeyword($name) |
|
224 | |||
225 | 104 | protected function expect($type) |
|
233 | |||
234 | 105 | protected function match($type) |
|
238 | |||
239 | 20 | protected function scanNumber() |
|
240 | { |
||
241 | 20 | $start = $this->pos; |
|
242 | 20 | if ($this->source[$this->pos] === '-') { |
|
243 | 2 | ++$this->pos; |
|
244 | 2 | } |
|
245 | |||
246 | 20 | $this->skipInteger(); |
|
247 | |||
248 | 20 | if (isset($this->source[$this->pos]) && $this->source[$this->pos] === '.') { |
|
249 | 1 | $this->pos++; |
|
250 | 1 | $this->skipInteger(); |
|
251 | 1 | } |
|
252 | |||
253 | 20 | $value = substr($this->source, $start, $this->pos - $start); |
|
254 | |||
255 | 20 | if (strpos($value, '.') === false) { |
|
256 | 20 | $value = (int) $value; |
|
257 | 20 | } else { |
|
258 | 1 | $value = (float) $value; |
|
259 | } |
||
260 | |||
261 | 20 | return new Token(Token::TYPE_NUMBER, $this->getLine(), $this->getColumn(), $value); |
|
262 | } |
||
263 | |||
264 | 20 | protected function skipInteger() |
|
265 | { |
||
266 | 20 | while ($this->pos < strlen($this->source)) { |
|
267 | 20 | $ch = $this->source[$this->pos]; |
|
268 | 20 | if ('0' <= $ch && $ch <= '9') { |
|
269 | 20 | $this->pos++; |
|
270 | 20 | } else { |
|
271 | 19 | break; |
|
272 | } |
||
273 | 20 | } |
|
274 | 20 | } |
|
275 | |||
276 | 10 | protected function createException($message) |
|
280 | |||
281 | 12 | protected function getLocation() |
|
282 | { |
||
283 | 12 | return new Location($this->getLine(), $this->getColumn()); |
|
284 | } |
||
285 | |||
286 | 108 | protected function getColumn() |
|
290 | |||
291 | 108 | protected function getLine() |
|
295 | |||
296 | /* |
||
297 | http://facebook.github.io/graphql/October2016/#sec-String-Value |
||
298 | */ |
||
299 | 29 | protected function scanString() |
|
300 | { |
||
301 | 29 | $len = strlen($this->source); |
|
302 | 29 | $this->pos++; |
|
303 | |||
304 | 29 | $value = ''; |
|
305 | 29 | while ($this->pos < $len) { |
|
306 | 29 | $ch = $this->source[$this->pos]; |
|
307 | 29 | if ($ch === '"') { |
|
308 | 28 | $token = new Token(Token::TYPE_STRING, $this->getLine(), $this->getColumn(), $value); |
|
309 | 28 | $this->pos++; |
|
310 | |||
311 | 28 | return $token; |
|
312 | } |
||
313 | |||
314 | 29 | if($ch === '\\' && ($this->pos < ($len - 1))) { |
|
315 | 1 | $this->pos++; |
|
316 | 1 | $ch = $this->source[$this->pos]; |
|
317 | switch($ch) { |
||
318 | 1 | case '"': |
|
319 | 1 | case '\\': |
|
320 | 1 | case '/': |
|
321 | 1 | break; |
|
322 | 1 | case 'b': |
|
323 | 1 | $ch = sprintf("%c", 8); |
|
324 | 1 | break; |
|
325 | 1 | case 'f': |
|
326 | 1 | $ch = "\f"; |
|
327 | 1 | break; |
|
328 | 1 | case 'n': |
|
329 | 1 | $ch = "\n"; |
|
330 | 1 | break; |
|
331 | 1 | case 'r': |
|
332 | 1 | $ch = "\r"; |
|
333 | 1 | break; |
|
334 | 1 | case 'u': |
|
335 | 1 | $codepoint = substr($this->source, $this->pos + 1, 4); |
|
336 | 1 | if( !preg_match('/[0-9A-Fa-f]{4}/', $codepoint)) { |
|
337 | throw $this->createException(sprintf('Invalid string unicode escape sequece "%s"', $codepoint)); |
||
338 | } |
||
339 | 1 | $ch = html_entity_decode("&#x{$codepoint};", ENT_QUOTES, 'UTF-8'); |
|
340 | 1 | $this->pos += 4; |
|
341 | 1 | break; |
|
342 | default: |
||
343 | throw $this->createException(sprintf('Unexpected string escaped character "%s"', $ch)); |
||
344 | break; |
||
|
|||
345 | |||
346 | } |
||
347 | 1 | } |
|
348 | |||
349 | 29 | $value .= $ch; |
|
350 | 29 | $this->pos++; |
|
351 | 29 | } |
|
352 | |||
353 | 1 | throw $this->createUnexpectedTokenTypeException(Token::TYPE_END); |
|
354 | } |
||
355 | |||
356 | 107 | protected function end() |
|
360 | |||
361 | 106 | protected function peek() |
|
365 | |||
366 | 105 | protected function lex() |
|
373 | |||
374 | 5 | protected function createUnexpectedException(Token $token) |
|
378 | |||
379 | 9 | protected function createUnexpectedTokenTypeException($tokenType) |
|
383 | } |
||
384 |
This check looks for unreachable code. It uses sophisticated control flow analysis techniques to find statements which will never be executed.
Unreachable code is most often the result of
return
,die
orexit
statements that have been added for debug purposes.In the above example, the last
return false
will never be executed, because a return statement has already been met in every possible execution path.