Complex classes like Lexer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Lexer, and based on these observations, apply Extract Interface, too.
| 1 | <?php | ||
| 21 | class Lexer | ||
| 22 | { | ||
| 23 | private $language; | ||
| 24 | private $lines; | ||
| 25 | private $linesCount; | ||
| 26 | private $line; | ||
| 27 | private $trimmedLine; | ||
| 28 | private $lineNumber; | ||
| 29 | private $eos; | ||
| 30 | private $keywords; | ||
| 31 | private $keywordsCache = array(); | ||
| 32 | private $stepKeywordTypesCache = array(); | ||
| 33 | private $deferredObjects = array(); | ||
| 34 | private $deferredObjectsCount = 0; | ||
| 35 | private $stashedToken; | ||
| 36 | private $inPyString = false; | ||
| 37 | private $pyStringSwallow = 0; | ||
| 38 | private $featureStarted = false; | ||
| 39 | private $allowMultilineArguments = false; | ||
| 40 | private $allowSteps = false; | ||
| 41 | |||
| 42 | /** | ||
| 43 | * Initializes lexer. | ||
| 44 | * | ||
| 45 | * @param KeywordsInterface $keywords Keywords holder | ||
| 46 | */ | ||
| 47 | 54 | public function __construct(KeywordsInterface $keywords) | |
| 48 |     { | ||
| 49 | 54 | $this->keywords = $keywords; | |
| 50 | 54 | } | |
| 51 | |||
| 52 | /** | ||
| 53 | * Sets lexer input. | ||
| 54 | * | ||
| 55 | * @param string $input Input string | ||
| 56 | * @param string $language Language name | ||
| 57 | * | ||
| 58 | * @throws Exception\LexerException | ||
| 59 | */ | ||
| 60 | 52 | public function analyse($input, $language = 'en') | |
| 61 |     { | ||
| 62 | // try to detect unsupported encoding | ||
| 63 | 52 |         if ('UTF-8' !== mb_detect_encoding($input, 'UTF-8', true)) { | |
| 64 |             throw new LexerException('Feature file is not in UTF8 encoding'); | ||
| 65 | } | ||
| 66 | |||
| 67 | 52 |         $input = strtr($input, array("\r\n" => "\n", "\r" => "\n")); | |
| 68 | |||
| 69 | 52 |         $this->lines = explode("\n", $input); | |
| 70 | 52 | $this->linesCount = count($this->lines); | |
| 71 | 52 | $this->line = $this->lines[0]; | |
| 72 | 52 | $this->lineNumber = 1; | |
| 73 | 52 | $this->trimmedLine = null; | |
| 74 | 52 | $this->eos = false; | |
| 75 | |||
| 76 | 52 | $this->deferredObjects = array(); | |
| 77 | 52 | $this->deferredObjectsCount = 0; | |
| 78 | 52 | $this->stashedToken = null; | |
| 79 | 52 | $this->inPyString = false; | |
| 80 | 52 | $this->pyStringSwallow = 0; | |
| 81 | |||
| 82 | 52 | $this->featureStarted = false; | |
| 83 | 52 | $this->allowMultilineArguments = false; | |
| 84 | 52 | $this->allowSteps = false; | |
| 85 | |||
| 86 | 52 | $this->keywords->setLanguage($this->language = $language); | |
| 87 | 52 | $this->keywordsCache = array(); | |
| 88 | 52 | $this->stepKeywordTypesCache = array(); | |
| 89 | 52 | } | |
| 90 | |||
| 91 | /** | ||
| 92 | * Returns current lexer language. | ||
| 93 | * | ||
| 94 | * @return string | ||
| 95 | */ | ||
| 96 | 51 | public function getLanguage() | |
| 100 | |||
| 101 | /** | ||
| 102 | * Returns next token or previously stashed one. | ||
| 103 | * | ||
| 104 | * @return array | ||
| 105 | */ | ||
| 106 | 52 | public function getAdvancedToken() | |
| 110 | |||
| 111 | /** | ||
| 112 | * Defers token. | ||
| 113 | * | ||
| 114 | * @param array $token Token to defer | ||
| 115 | */ | ||
| 116 | public function deferToken(array $token) | ||
| 117 |     { | ||
| 118 | $token['deferred'] = true; | ||
| 119 | $this->deferredObjects[] = $token; | ||
| 120 | ++$this->deferredObjectsCount; | ||
| 121 | } | ||
| 122 | |||
| 123 | /** | ||
| 124 | * Predicts for number of tokens. | ||
| 125 | * | ||
| 126 | * @return array | ||
| 127 | */ | ||
| 128 | 52 | public function predictToken() | |
| 129 |     { | ||
| 130 | 52 |         if (null === $this->stashedToken) { | |
| 131 | 52 | $this->stashedToken = $this->getNextToken(); | |
| 132 | 52 | } | |
| 133 | |||
| 134 | 52 | return $this->stashedToken; | |
| 135 | } | ||
| 136 | |||
| 137 | /** | ||
| 138 | * Constructs token with specified parameters. | ||
| 139 | * | ||
| 140 | * @param string $type Token type | ||
| 141 | * @param string $value Token value | ||
| 142 | * | ||
| 143 | * @return array | ||
| 144 | */ | ||
| 145 | 52 | public function takeToken($type, $value = null) | |
| 146 |     { | ||
| 147 | return array( | ||
| 148 | 52 | 'type' => $type, | |
| 149 | 52 | 'line' => $this->lineNumber, | |
| 150 | 52 | 'value' => $value ?: null, | |
| 151 | 'deferred' => false | ||
| 152 | 52 | ); | |
| 153 | } | ||
| 154 | |||
| 155 | /** | ||
| 156 | * Consumes line from input & increments line counter. | ||
| 157 | */ | ||
| 158 | 52 | protected function consumeLine() | |
| 159 |     { | ||
| 160 | 52 | ++$this->lineNumber; | |
| 161 | |||
| 162 | 52 |         if (($this->lineNumber - 1) === $this->linesCount) { | |
| 163 | 49 | $this->eos = true; | |
| 164 | |||
| 165 | 49 | return; | |
| 166 | } | ||
| 167 | |||
| 168 | 52 | $this->line = $this->lines[$this->lineNumber - 1]; | |
| 169 | 52 | $this->trimmedLine = null; | |
| 170 | 52 | } | |
| 171 | |||
| 172 | /** | ||
| 173 | * Returns trimmed version of line. | ||
| 174 | * | ||
| 175 | * @return string | ||
| 176 | */ | ||
| 177 | 52 | protected function getTrimmedLine() | |
| 181 | |||
| 182 | /** | ||
| 183 | * Returns stashed token or null if hasn't. | ||
| 184 | * | ||
| 185 | * @return array|null | ||
| 186 | */ | ||
| 187 | 52 | protected function getStashedToken() | |
| 188 |     { | ||
| 189 | 52 | $stashedToken = $this->stashedToken; | |
| 190 | 52 | $this->stashedToken = null; | |
| 191 | |||
| 192 | 52 | return $stashedToken; | |
| 193 | } | ||
| 194 | |||
| 195 | /** | ||
| 196 | * Returns deferred token or null if hasn't. | ||
| 197 | * | ||
| 198 | * @return array|null | ||
| 199 | */ | ||
| 200 | 52 | protected function getDeferredToken() | |
| 201 |     { | ||
| 202 | 52 |         if (!$this->deferredObjectsCount) { | |
| 203 | 52 | return null; | |
| 204 | } | ||
| 205 | |||
| 206 | --$this->deferredObjectsCount; | ||
| 207 | |||
| 208 | return array_shift($this->deferredObjects); | ||
| 209 | } | ||
| 210 | |||
| 211 | /** | ||
| 212 | * Returns next token from input. | ||
| 213 | * | ||
| 214 | * @return array | ||
| 215 | */ | ||
| 216 | 52 | protected function getNextToken() | |
| 217 |     { | ||
| 218 | 52 | return $this->getDeferredToken() | |
| 219 | 52 | ?: $this->scanEOS() | |
| 220 | 52 | ?: $this->scanLanguage() | |
| 221 | 52 | ?: $this->scanComment() | |
| 222 | 52 | ?: $this->scanPyStringOp() | |
| 223 | 52 | ?: $this->scanPyStringContent() | |
| 224 | 52 | ?: $this->scanStep() | |
| 225 | 52 | ?: $this->scanScenario() | |
| 226 | 52 | ?: $this->scanBackground() | |
| 227 | 52 | ?: $this->scanOutline() | |
| 228 | 52 | ?: $this->scanExamples() | |
| 229 | 52 | ?: $this->scanFeature() | |
| 230 | 51 | ?: $this->scanTags() | |
| 231 | 51 | ?: $this->scanTableRow() | |
| 232 | 51 | ?: $this->scanNewline() | |
| 233 | 52 | ?: $this->scanText(); | |
| 234 | } | ||
| 235 | |||
| 236 | /** | ||
| 237 | * Scans for token with specified regex. | ||
| 238 | * | ||
| 239 | * @param string $regex Regular expression | ||
| 240 | * @param string $type Expected token type | ||
| 241 | * | ||
| 242 | * @return null|array | ||
| 243 | */ | ||
| 244 | 11 | protected function scanInput($regex, $type) | |
| 245 |     { | ||
| 246 | 11 |         if (!preg_match($regex, $this->line, $matches)) { | |
| 247 | 4 | return null; | |
| 248 | } | ||
| 249 | |||
| 250 | 9 | $token = $this->takeToken($type, $matches[1]); | |
| 251 | 9 | $this->consumeLine(); | |
| 252 | |||
| 253 | 9 | return $token; | |
| 254 | } | ||
| 255 | |||
| 256 | /** | ||
| 257 | * Scans for token with specified keywords. | ||
| 258 | * | ||
| 259 | * @param string $keywords Keywords (splitted with |) | ||
| 260 | * @param string $type Expected token type | ||
| 261 | * | ||
| 262 | * @return null|array | ||
| 263 | */ | ||
| 264 | 52 | protected function scanInputForKeywords($keywords, $type) | |
| 265 |     { | ||
| 266 | 52 |         if (!preg_match('/^(\s*)(' . $keywords . '):\s*(.*)/u', $this->line, $matches)) { | |
| 267 | 52 | return null; | |
| 268 | } | ||
| 269 | |||
| 270 | 51 | $token = $this->takeToken($type, $matches[3]); | |
| 271 | 51 | $token['keyword'] = $matches[2]; | |
| 272 | 51 | $token['indent'] = mb_strlen($matches[1], 'utf8'); | |
| 273 | |||
| 274 | 51 | $this->consumeLine(); | |
| 275 | |||
| 276 | // turn off language searching | ||
| 277 | 51 |         if ('Feature' === $type) { | |
| 278 | 51 | $this->featureStarted = true; | |
| 279 | 51 | } | |
| 280 | |||
| 281 | // turn off PyString and Table searching | ||
| 282 | 51 |         if ('Feature' === $type || 'Scenario' === $type || 'Outline' === $type) { | |
| 283 | 51 | $this->allowMultilineArguments = false; | |
| 284 | 51 |         } elseif ('Examples' === $type) { | |
| 285 | 16 | $this->allowMultilineArguments = true; | |
| 286 | 16 | } | |
| 287 | |||
| 288 | // turn on steps searching | ||
| 289 | 51 |         if ('Scenario' === $type || 'Background' === $type || 'Outline' === $type) { | |
| 290 | 46 | $this->allowSteps = true; | |
| 291 | 46 | } | |
| 292 | |||
| 293 | 51 | return $token; | |
| 294 | } | ||
| 295 | |||
| 296 | /** | ||
| 297 | * Scans EOS from input & returns it if found. | ||
| 298 | * | ||
| 299 | * @return null|array | ||
| 300 | */ | ||
| 301 | 52 | protected function scanEOS() | |
| 302 |     { | ||
| 303 | 52 |         if (!$this->eos) { | |
| 304 | 52 | return null; | |
| 305 | } | ||
| 306 | |||
| 307 | 48 |         return $this->takeToken('EOS'); | |
| 308 | } | ||
| 309 | |||
| 310 | /** | ||
| 311 | * Returns keywords for provided type. | ||
| 312 | * | ||
| 313 | * @param string $type Keyword type | ||
| 314 | * | ||
| 315 | * @return string | ||
| 316 | */ | ||
| 317 | 52 | protected function getKeywords($type) | |
| 318 |     { | ||
| 319 | 52 |         if (!isset($this->keywordsCache[$type])) { | |
| 320 | 52 | $getter = 'get' . $type . 'Keywords'; | |
| 321 | 52 | $keywords = $this->keywords->$getter(); | |
| 322 | |||
| 323 | 52 |             if ('Step' === $type) { | |
| 324 | 43 | $padded = array(); | |
| 325 | 43 |                 foreach (explode('|', $keywords) as $keyword) { | |
| 326 | 43 | $padded[] = false !== mb_strpos($keyword, '<', 0, 'utf8') | |
| 327 | 43 | ? preg_quote(mb_substr($keyword, 0, -1, 'utf8'), '/') . '\s*' | |
| 328 | 43 | : preg_quote($keyword, '/') . '\s+'; | |
| 329 | 43 | } | |
| 330 | |||
| 331 | 43 |                 $keywords = implode('|', $padded); | |
| 332 | 43 | } | |
| 333 | |||
| 334 | 52 | $this->keywordsCache[$type] = $keywords; | |
| 335 | 52 | } | |
| 336 | |||
| 337 | 52 | return $this->keywordsCache[$type]; | |
| 338 | } | ||
| 339 | |||
| 340 | /** | ||
| 341 | * Scans Feature from input & returns it if found. | ||
| 342 | * | ||
| 343 | * @return null|array | ||
| 344 | */ | ||
| 345 | 52 | protected function scanFeature() | |
| 349 | |||
| 350 | /** | ||
| 351 | * Scans Background from input & returns it if found. | ||
| 352 | * | ||
| 353 | * @return null|array | ||
| 354 | */ | ||
| 355 | 52 | protected function scanBackground() | |
| 359 | |||
| 360 | /** | ||
| 361 | * Scans Scenario from input & returns it if found. | ||
| 362 | * | ||
| 363 | * @return null|array | ||
| 364 | */ | ||
| 365 | 52 | protected function scanScenario() | |
| 369 | |||
| 370 | /** | ||
| 371 | * Scans Scenario Outline from input & returns it if found. | ||
| 372 | * | ||
| 373 | * @return null|array | ||
| 374 | */ | ||
| 375 | 52 | protected function scanOutline() | |
| 379 | |||
| 380 | /** | ||
| 381 | * Scans Scenario Outline Examples from input & returns it if found. | ||
| 382 | * | ||
| 383 | * @return null|array | ||
| 384 | */ | ||
| 385 | 52 | protected function scanExamples() | |
| 389 | |||
| 390 | /** | ||
| 391 | * Scans Step from input & returns it if found. | ||
| 392 | * | ||
| 393 | * @return null|array | ||
| 394 | */ | ||
| 395 | 52 | protected function scanStep() | |
| 396 |     { | ||
| 397 | 52 |         if (!$this->allowSteps) { | |
| 398 | 52 | return null; | |
| 399 | } | ||
| 416 | |||
| 417 | /** | ||
| 418 | * Scans PyString from input & returns it if found. | ||
| 419 | * | ||
| 420 | * @return null|array | ||
| 421 | */ | ||
| 422 | 52 | protected function scanPyStringOp() | |
| 440 | |||
| 441 | /** | ||
| 442 | * Scans PyString content. | ||
| 443 | * | ||
| 444 | * @return null|array | ||
| 445 | */ | ||
| 446 | 52 | protected function scanPyStringContent() | |
| 458 | |||
| 459 | /** | ||
| 460 | * Scans Table Row from input & returns it if found. | ||
| 461 | * | ||
| 462 | * @return null|array | ||
| 463 | */ | ||
| 464 | 51 | protected function scanTableRow() | |
| 486 | |||
| 487 | /** | ||
| 488 | * Scans Tags from input & returns it if found. | ||
| 489 | * | ||
| 490 | * @return null|array | ||
| 491 | */ | ||
| 492 | 51 | protected function scanTags() | |
| 508 | |||
| 509 | /** | ||
| 510 | * Scans Language specifier from input & returns it if found. | ||
| 511 | * | ||
| 512 | * @return null|array | ||
| 513 | */ | ||
| 514 | 52 | protected function scanLanguage() | |
| 530 | |||
| 531 | /** | ||
| 532 | * Scans Comment from input & returns it if found. | ||
| 533 | * | ||
| 534 | * @return null|array | ||
| 535 | */ | ||
| 536 | 52 | protected function scanComment() | |
| 552 | |||
| 553 | /** | ||
| 554 | * Scans Newline from input & returns it if found. | ||
| 555 | * | ||
| 556 | * @return null|array | ||
| 557 | */ | ||
| 558 | 51 | protected function scanNewline() | |
| 569 | |||
| 570 | /** | ||
| 571 | * Scans text from input & returns it if found. | ||
| 572 | * | ||
| 573 | * @return null|array | ||
| 574 | */ | ||
| 575 | 36 | protected function scanText() | |
| 582 | |||
| 583 | /** | ||
| 584 | * Returns step type keyword (Given, When, Then, etc.). | ||
| 585 | * | ||
| 586 | * @param string $native Step keyword in provided language | ||
| 587 | * @return string | ||
| 588 | */ | ||
| 589 | 39 | private function getStepKeywordType($native) | |
| 614 | } | ||
| 615 |