Complex classes like Tokenizer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Tokenizer, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 12 | class Tokenizer |
||
| 13 | { |
||
| 14 | protected $source; |
||
| 15 | protected $pos = 0; |
||
| 16 | protected $line = 1; |
||
| 17 | protected $lineStart = 0; |
||
| 18 | |||
| 19 | /** @var Token */ |
||
| 20 | protected $lookAhead; |
||
| 21 | |||
| 22 | 95 | protected function initTokenizer($source) |
|
| 27 | |||
| 28 | 95 | protected function next() |
|
| 37 | |||
| 38 | 95 | protected function skipWhitespace() |
|
| 39 | { |
||
| 40 | 95 | while ($this->pos < strlen($this->source)) { |
|
| 41 | 94 | $ch = $this->source[$this->pos]; |
|
| 42 | 94 | if ($ch === ' ' || $ch === "\t" || $ch === ',') { |
|
| 43 | 92 | $this->pos++; |
|
| 44 | 94 | } elseif ($ch === '#') { |
|
| 45 | 1 | $this->pos++; |
|
| 46 | while ( |
||
| 47 | 1 | $this->pos < strlen($this->source) && |
|
| 48 | 1 | ($code = ord($this->source[$this->pos])) && |
|
| 49 | 1 | $code !== 10 && $code !== 13 && $code !== 0x2028 && $code !== 0x2029 |
|
| 50 | 1 | ) { |
|
| 51 | 1 | $this->pos++; |
|
| 52 | 1 | } |
|
| 53 | 94 | } elseif ($ch === "\r") { |
|
| 54 | 1 | $this->pos++; |
|
| 55 | 1 | if ($this->source[$this->pos] === "\n") { |
|
| 56 | 1 | $this->pos++; |
|
| 57 | 1 | } |
|
| 58 | 1 | $this->line++; |
|
| 59 | 1 | $this->lineStart = $this->pos; |
|
| 60 | 94 | } elseif ($ch === "\n") { |
|
| 61 | 31 | $this->pos++; |
|
| 62 | 31 | $this->line++; |
|
| 63 | 31 | $this->lineStart = $this->pos; |
|
| 64 | 31 | } else { |
|
| 65 | 94 | break; |
|
| 66 | } |
||
| 67 | 92 | } |
|
| 68 | 95 | } |
|
| 69 | |||
| 70 | /** |
||
| 71 | * @return Token |
||
| 72 | * |
||
| 73 | * @throws SyntaxErrorException |
||
| 74 | */ |
||
| 75 | 95 | protected function scan() |
|
| 76 | { |
||
| 77 | 95 | if ($this->pos >= strlen($this->source)) { |
|
| 78 | 86 | return new Token(Token::TYPE_END, $this->getLine(), $this->getColumn()); |
|
| 79 | } |
||
| 80 | |||
| 81 | 94 | $ch = $this->source[$this->pos]; |
|
| 82 | switch ($ch) { |
||
| 83 | 94 | case Token::TYPE_LPAREN: |
|
| 84 | 55 | ++$this->pos; |
|
| 85 | |||
| 86 | 55 | return new Token(Token::TYPE_LPAREN, $this->getLine(), $this->getColumn()); |
|
| 87 | 94 | case Token::TYPE_RPAREN: |
|
| 88 | 48 | ++$this->pos; |
|
| 89 | |||
| 90 | 48 | return new Token(Token::TYPE_RPAREN, $this->getLine(), $this->getColumn()); |
|
| 91 | 94 | case Token::TYPE_LBRACE: |
|
| 92 | 93 | ++$this->pos; |
|
| 93 | |||
| 94 | 93 | return new Token(Token::TYPE_LBRACE, $this->getLine(), $this->getColumn()); |
|
| 95 | 94 | case Token::TYPE_RBRACE: |
|
| 96 | 86 | ++$this->pos; |
|
| 97 | |||
| 98 | 86 | return new Token(Token::TYPE_RBRACE, $this->getLine(), $this->getColumn()); |
|
| 99 | 93 | case Token::TYPE_COMMA: |
|
| 100 | ++$this->pos; |
||
| 101 | |||
| 102 | return new Token(Token::TYPE_COMMA, $this->getLine(), $this->getColumn()); |
||
| 103 | 93 | case Token::TYPE_LSQUARE_BRACE: |
|
| 104 | 11 | ++$this->pos; |
|
| 105 | |||
| 106 | 11 | return new Token(Token::TYPE_LSQUARE_BRACE, $this->getLine(), $this->getColumn()); |
|
| 107 | 93 | case Token::TYPE_RSQUARE_BRACE: |
|
| 108 | 10 | ++$this->pos; |
|
| 109 | |||
| 110 | 10 | return new Token(Token::TYPE_RSQUARE_BRACE, $this->getLine(), $this->getColumn()); |
|
| 111 | 93 | case Token::TYPE_REQUIRED: |
|
| 112 | 3 | ++$this->pos; |
|
| 113 | |||
| 114 | 3 | return new Token(Token::TYPE_REQUIRED, $this->getLine(), $this->getColumn()); |
|
| 115 | 93 | case Token::TYPE_COLON: |
|
| 116 | 59 | ++$this->pos; |
|
| 117 | |||
| 118 | 59 | return new Token(Token::TYPE_COLON, $this->getLine(), $this->getColumn()); |
|
| 119 | |||
| 120 | 93 | case Token::TYPE_POINT: |
|
|
|
|||
| 121 | 13 | if ($this->checkFragment()) { |
|
| 122 | 12 | return new Token(Token::TYPE_FRAGMENT_REFERENCE, $this->getLine(), $this->getColumn()); |
|
| 123 | } else { |
||
| 124 | 1 | return new Token(Token::TYPE_POINT, $this->getLine(), $this->getColumn()); |
|
| 125 | } |
||
| 126 | |||
| 127 | 93 | case Token::TYPE_VARIABLE: |
|
| 128 | 11 | ++$this->pos; |
|
| 129 | |||
| 130 | 11 | return new Token(Token::TYPE_VARIABLE, $this->getLine(), $this->getColumn()); |
|
| 131 | } |
||
| 132 | |||
| 133 | 93 | if ($ch === '_' || 'a' <= $ch && $ch <= 'z' || 'A' <= $ch && $ch <= 'Z') { |
|
| 134 | 93 | return $this->scanWord(); |
|
| 135 | } |
||
| 136 | |||
| 137 | 37 | if ($ch === '-' || '0' <= $ch && $ch <= '9') { |
|
| 138 | 20 | return $this->scanNumber(); |
|
| 139 | } |
||
| 140 | |||
| 141 | 23 | if ($ch === '"') { |
|
| 142 | 23 | return $this->scanString(); |
|
| 143 | } |
||
| 144 | |||
| 145 | 1 | throw $this->createException('Can\t recognize token type'); |
|
| 146 | } |
||
| 147 | |||
| 148 | 13 | protected function checkFragment() |
|
| 166 | |||
| 167 | 93 | protected function scanWord() |
|
| 186 | |||
| 187 | 93 | protected function getKeyword($name) |
|
| 214 | |||
| 215 | 92 | protected function expect($type) |
|
| 223 | |||
| 224 | 93 | protected function match($type) |
|
| 228 | |||
| 229 | 20 | protected function scanNumber() |
|
| 253 | |||
| 254 | 20 | protected function skipInteger() |
|
| 265 | |||
| 266 | 10 | protected function createException($message) |
|
| 270 | |||
| 271 | 12 | protected function getLocation() |
|
| 272 | { |
||
| 273 | 12 | return new Location($this->getLine(), $this->getColumn()); |
|
| 274 | } |
||
| 275 | |||
| 276 | 95 | protected function getColumn() |
|
| 280 | |||
| 281 | 95 | protected function getLine() |
|
| 285 | |||
| 286 | 23 | protected function scanString() |
|
| 306 | |||
| 307 | 95 | protected function end() |
|
| 311 | |||
| 312 | 94 | protected function peek() |
|
| 316 | |||
| 317 | 93 | protected function lex() |
|
| 324 | |||
| 325 | 5 | protected function createUnexpectedException(Token $token) |
|
| 329 | |||
| 330 | 9 | protected function createUnexpectedTokenTypeException($tokenType) |
|
| 334 | } |
||
| 335 |