Complex classes like Tokenizer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Tokenizer, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 10 | class Tokenizer |
||
| 11 | { |
||
| 12 | protected $source; |
||
| 13 | protected $pos = 0; |
||
| 14 | protected $line = 1; |
||
| 15 | protected $lineStart = 0; |
||
| 16 | |||
| 17 | /** @var Token */ |
||
| 18 | protected $lookAhead; |
||
| 19 | |||
| 20 | 9 | public function setSource($source) |
|
| 25 | |||
| 26 | 9 | protected function next() |
|
| 39 | |||
| 40 | 9 | protected function skipWhitespace() |
|
| 41 | { |
||
| 42 | 9 | while ($this->pos < strlen($this->source)) { |
|
| 43 | 9 | $ch = $this->source[$this->pos]; |
|
| 44 | 9 | if ($ch === ' ' || $ch === "\t") { |
|
| 45 | 8 | $this->pos++; |
|
| 46 | 9 | } elseif ($ch === "\r") { |
|
| 47 | $this->pos++; |
||
| 48 | if ($this->source[$this->pos] === "\n") { |
||
| 49 | $this->pos++; |
||
| 50 | } |
||
| 51 | $this->line++; |
||
| 52 | $this->lineStart = $this->pos; |
||
| 53 | 9 | } elseif ($ch === "\n") { |
|
| 54 | $this->pos++; |
||
| 55 | $this->line++; |
||
| 56 | $this->lineStart = $this->pos; |
||
| 57 | } else { |
||
| 58 | 9 | break; |
|
| 59 | } |
||
| 60 | } |
||
| 61 | 9 | } |
|
| 62 | |||
| 63 | /** |
||
| 64 | * @return Token |
||
| 65 | */ |
||
| 66 | 9 | protected function scan() |
|
| 141 | |||
| 142 | protected function checkFragment() |
||
| 160 | |||
| 161 | 8 | protected function scanWord() |
|
| 162 | { |
||
| 163 | 8 | $start = $this->pos; |
|
| 164 | 8 | $this->pos++; |
|
| 165 | |||
| 166 | 8 | while ($this->pos < strlen($this->source)) { |
|
| 167 | 8 | $ch = $this->source[$this->pos]; |
|
| 168 | |||
| 169 | 8 | if ($ch === '_' || $ch === '$' || 'a' <= $ch && $ch <= ('z') || 'A' <= $ch && $ch <= 'Z' || '0' <= $ch && $ch <= '9') { |
|
| 170 | 8 | $this->pos++; |
|
| 171 | } else { |
||
| 172 | 8 | break; |
|
| 173 | } |
||
| 174 | } |
||
| 175 | |||
| 176 | 8 | $value = substr($this->source, $start, $this->pos - $start); |
|
| 177 | |||
| 178 | 8 | return new Token($this->getKeyword($value), $value); |
|
| 179 | } |
||
| 180 | |||
| 181 | 8 | protected function getKeyword($name) |
|
| 182 | { |
||
| 183 | switch ($name) { |
||
| 184 | 8 | case 'null': |
|
| 185 | 1 | return Token::TYPE_NULL; |
|
| 186 | |||
| 187 | case 'true': |
||
| 188 | 2 | return Token::TYPE_TRUE; |
|
| 189 | |||
| 190 | case 'false': |
||
| 191 | return Token::TYPE_FALSE; |
||
| 192 | |||
| 193 | case 'query': |
||
| 194 | return Token::TYPE_QUERY; |
||
| 195 | |||
| 196 | case 'fragment': |
||
| 197 | return Token::TYPE_FRAGMENT; |
||
| 198 | |||
| 199 | case 'mutation': |
||
| 200 | 2 | return Token::TYPE_MUTATION; |
|
| 201 | |||
| 202 | case 'on': |
||
| 203 | return Token::TYPE_ON; |
||
| 204 | |||
| 205 | 8 | case 'as': |
|
| 206 | return Token::TYPE_AS; |
||
| 207 | } |
||
| 208 | |||
| 209 | 8 | return Token::TYPE_IDENTIFIER; |
|
| 210 | } |
||
| 211 | |||
| 212 | 6 | protected function scanNumber() |
|
| 213 | { |
||
| 214 | 6 | $start = $this->pos; |
|
| 215 | |||
| 216 | 6 | if ($this->source[$this->pos] === '-') { |
|
| 217 | $this->pos++; |
||
| 218 | } |
||
| 219 | |||
| 220 | 6 | $this->skipInteger(); |
|
| 221 | |||
| 222 | 6 | if ($this->source[$this->pos] === '->' || $this->source[$this->pos] === '.') { |
|
| 223 | 1 | $this->pos++; |
|
| 224 | 1 | $this->skipInteger(); |
|
| 225 | } |
||
| 226 | |||
| 227 | 6 | $ch = $this->source[$this->pos]; |
|
| 228 | 6 | if ($ch === 'e' || $ch === 'E') { |
|
| 229 | $this->pos++; |
||
| 230 | |||
| 231 | $ch = $this->source[$this->pos]; |
||
| 232 | if ($ch === '+' || $ch === '-') { |
||
| 233 | $this->pos++; |
||
| 234 | } |
||
| 235 | |||
| 236 | $this->skipInteger(); |
||
| 237 | } |
||
| 238 | |||
| 239 | 6 | $value = (float)substr($this->source, $start, $this->pos); |
|
| 240 | |||
| 241 | 6 | return new Token(Token::TYPE_NUMBER, $value); |
|
| 242 | } |
||
| 243 | |||
| 244 | 6 | protected function skipInteger() |
|
| 245 | { |
||
| 246 | 6 | $start = $this->pos; |
|
| 247 | |||
| 248 | 6 | while ($this->pos < strlen($this->source)) { |
|
| 249 | 6 | $ch = $this->source[$this->pos]; |
|
| 250 | 6 | if ('0' <= $ch && $ch <= '9') { |
|
| 251 | 6 | $this->pos++; |
|
| 252 | } else { |
||
| 253 | 6 | break; |
|
| 254 | } |
||
| 255 | } |
||
| 256 | |||
| 257 | 6 | if ($this->pos - $start === 0) { |
|
| 258 | throw $this->createIllegal(); |
||
| 259 | } |
||
| 260 | 6 | } |
|
| 261 | |||
| 262 | protected function createIllegal() |
||
| 268 | |||
| 269 | protected function createError($message) |
||
| 273 | |||
| 274 | protected function getColumn() |
||
| 278 | |||
| 279 | 4 | protected function scanString() |
|
| 280 | { |
||
| 281 | 4 | $this->pos++; |
|
| 282 | |||
| 283 | 4 | $value = ''; |
|
| 284 | 4 | while ($this->pos < strlen($this->source)) { |
|
| 285 | 4 | $ch = $this->source[$this->pos]; |
|
| 286 | 4 | if ($ch === '"') { |
|
| 287 | 4 | $this->pos++; |
|
| 288 | |||
| 289 | 4 | return new Token(Token::TYPE_STRING, $value); |
|
| 290 | } |
||
| 291 | |||
| 292 | 4 | if ($ch === "\r" || $ch === "\n") { |
|
| 293 | break; |
||
| 294 | } |
||
| 295 | |||
| 296 | 4 | $value .= $ch; |
|
| 297 | 4 | $this->pos++; |
|
| 298 | } |
||
| 299 | |||
| 300 | throw $this->createIllegal(); |
||
| 301 | } |
||
| 302 | |||
| 303 | 9 | protected function end() |
|
| 307 | |||
| 308 | protected function peek() |
||
| 312 | |||
| 313 | 9 | protected function lex() |
|
| 320 | |||
| 321 | protected function createUnexpected(Token $token) |
||
| 322 | { |
||
| 323 | switch ($token->getType()) { |
||
| 324 | case Token::TYPE_END: |
||
| 325 | return $this->createError('Unexpected end of input'); |
||
| 326 | case Token::TYPE_NUMBER: |
||
| 336 | } |
An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.
If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.