Complex classes like Lexer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Lexer, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
21 | class Lexer |
||
22 | { |
||
23 | private $language; |
||
24 | private $lines; |
||
25 | private $linesCount; |
||
26 | private $line; |
||
27 | private $trimmedLine; |
||
28 | private $lineNumber; |
||
29 | private $eos; |
||
30 | private $keywords; |
||
31 | private $keywordsCache = array(); |
||
32 | private $stepKeywordTypesCache = array(); |
||
33 | private $deferredObjects = array(); |
||
34 | private $deferredObjectsCount = 0; |
||
35 | private $stashedToken; |
||
36 | private $inPyString = false; |
||
37 | private $pyStringSwallow = 0; |
||
38 | private $featureStarted = false; |
||
39 | private $allowMultilineArguments = false; |
||
40 | private $allowSteps = false; |
||
41 | |||
42 | /** |
||
43 | * Initializes lexer. |
||
44 | * |
||
45 | * @param KeywordsInterface $keywords Keywords holder |
||
46 | */ |
||
47 | 245 | public function __construct(KeywordsInterface $keywords) |
|
51 | |||
52 | /** |
||
53 | * Sets lexer input. |
||
54 | * |
||
55 | * @param string $input Input string |
||
56 | * @param string $language Language name |
||
57 | * |
||
58 | * @throws Exception\LexerException |
||
59 | */ |
||
60 | 243 | public function analyse($input, $language = 'en') |
|
90 | |||
91 | /** |
||
92 | * Returns current lexer language. |
||
93 | * |
||
94 | * @return string |
||
95 | */ |
||
96 | 242 | public function getLanguage() |
|
100 | |||
101 | /** |
||
102 | * Returns next token or previously stashed one. |
||
103 | * |
||
104 | * @return array |
||
105 | */ |
||
106 | 243 | public function getAdvancedToken() |
|
110 | |||
111 | /** |
||
112 | * Defers token. |
||
113 | * |
||
114 | * @param array $token Token to defer |
||
115 | */ |
||
116 | public function deferToken(array $token) |
||
122 | |||
123 | /** |
||
124 | * Predicts for number of tokens. |
||
125 | * |
||
126 | * @return array |
||
127 | */ |
||
128 | 243 | public function predictToken() |
|
136 | |||
137 | /** |
||
138 | * Constructs token with specified parameters. |
||
139 | * |
||
140 | * @param string $type Token type |
||
141 | * @param string $value Token value |
||
142 | * |
||
143 | * @return array |
||
144 | */ |
||
145 | 243 | public function takeToken($type, $value = null) |
|
154 | |||
155 | /** |
||
156 | * Consumes line from input & increments line counter. |
||
157 | */ |
||
158 | 243 | protected function consumeLine() |
|
171 | |||
172 | /** |
||
173 | * Returns trimmed version of line. |
||
174 | * |
||
175 | * @return string |
||
176 | */ |
||
177 | 243 | protected function getTrimmedLine() |
|
181 | |||
182 | /** |
||
183 | * Returns stashed token or null if hasn't. |
||
184 | * |
||
185 | * @return array|null |
||
186 | */ |
||
187 | 243 | protected function getStashedToken() |
|
194 | |||
195 | /** |
||
196 | * Returns deferred token or null if hasn't. |
||
197 | * |
||
198 | * @return array|null |
||
199 | */ |
||
200 | 243 | protected function getDeferredToken() |
|
210 | |||
211 | /** |
||
212 | * Returns next token from input. |
||
213 | * |
||
214 | * @return array |
||
215 | */ |
||
216 | 243 | protected function getNextToken() |
|
235 | |||
236 | /** |
||
237 | * Scans for token with specified regex. |
||
238 | * |
||
239 | * @param string $regex Regular expression |
||
240 | * @param string $type Expected token type |
||
241 | * |
||
242 | * @return null|array |
||
243 | */ |
||
244 | 195 | protected function scanInput($regex, $type) |
|
255 | |||
256 | /** |
||
257 | * Scans for token with specified keywords. |
||
258 | * |
||
259 | * @param string $keywords Keywords (splitted with |) |
||
260 | * @param string $type Expected token type |
||
261 | * |
||
262 | * @return null|array |
||
263 | */ |
||
264 | 243 | protected function scanInputForKeywords($keywords, $type) |
|
295 | |||
296 | /** |
||
297 | * Scans EOS from input & returns it if found. |
||
298 | * |
||
299 | * @return null|array |
||
300 | */ |
||
301 | 243 | protected function scanEOS() |
|
309 | |||
310 | /** |
||
311 | * Returns keywords for provided type. |
||
312 | * |
||
313 | * @param string $type Keyword type |
||
314 | * |
||
315 | * @return string |
||
316 | */ |
||
317 | 243 | protected function getKeywords($type) |
|
339 | |||
340 | /** |
||
341 | * Scans Feature from input & returns it if found. |
||
342 | * |
||
343 | * @return null|array |
||
344 | */ |
||
345 | 243 | protected function scanFeature() |
|
349 | |||
350 | /** |
||
351 | * Scans Background from input & returns it if found. |
||
352 | * |
||
353 | * @return null|array |
||
354 | */ |
||
355 | 243 | protected function scanBackground() |
|
359 | |||
360 | /** |
||
361 | * Scans Scenario from input & returns it if found. |
||
362 | * |
||
363 | * @return null|array |
||
364 | */ |
||
365 | 243 | protected function scanScenario() |
|
369 | |||
370 | /** |
||
371 | * Scans Scenario Outline from input & returns it if found. |
||
372 | * |
||
373 | * @return null|array |
||
374 | */ |
||
375 | 243 | protected function scanOutline() |
|
379 | |||
380 | /** |
||
381 | * Scans Scenario Outline Examples from input & returns it if found. |
||
382 | * |
||
383 | * @return null|array |
||
384 | */ |
||
385 | 243 | protected function scanExamples() |
|
389 | |||
390 | /** |
||
391 | * Scans Step from input & returns it if found. |
||
392 | * |
||
393 | * @return null|array |
||
394 | */ |
||
395 | 243 | protected function scanStep() |
|
416 | |||
417 | /** |
||
418 | * Scans PyString from input & returns it if found. |
||
419 | * |
||
420 | * @return null|array |
||
421 | */ |
||
422 | 243 | protected function scanPyStringOp() |
|
440 | |||
441 | /** |
||
442 | * Scans PyString content. |
||
443 | * |
||
444 | * @return null|array |
||
445 | */ |
||
446 | 243 | protected function scanPyStringContent() |
|
458 | |||
459 | /** |
||
460 | * Scans Table Row from input & returns it if found. |
||
461 | * |
||
462 | * @return null|array |
||
463 | */ |
||
464 | 240 | protected function scanTableRow() |
|
465 | { |
||
466 | 240 | if (!$this->allowMultilineArguments) { |
|
467 | 234 | return null; |
|
468 | } |
||
469 | |||
470 | 225 | $line = $this->getTrimmedLine(); |
|
471 | 225 | if (!isset($line[0]) || '|' !== $line[0] || '|' !== substr($line, -1)) { |
|
472 | 225 | return null; |
|
473 | } |
||
474 | |||
475 | 206 | $token = $this->takeToken('TableRow'); |
|
476 | 206 | $line = mb_substr($line, 1, mb_strlen($line, 'utf8') - 2, 'utf8'); |
|
477 | $columns = array_map(function ($column) { |
||
478 | 206 | return trim(str_replace('\\|', '|', $column)); |
|
479 | 206 | }, preg_split('/(?<!\\\)\|/u', $line)); |
|
480 | 206 | $token['columns'] = $columns; |
|
481 | |||
482 | 206 | $this->consumeLine(); |
|
483 | |||
484 | 206 | return $token; |
|
485 | } |
||
486 | |||
487 | /** |
||
488 | * Scans Tags from input & returns it if found. |
||
489 | * |
||
490 | * @return null|array |
||
491 | */ |
||
492 | 241 | protected function scanTags() |
|
508 | |||
509 | /** |
||
510 | * Scans Language specifier from input & returns it if found. |
||
511 | * |
||
512 | * @return null|array |
||
513 | */ |
||
514 | 243 | protected function scanLanguage() |
|
530 | |||
531 | /** |
||
532 | * Scans Comment from input & returns it if found. |
||
533 | * |
||
534 | * @return null|array |
||
535 | */ |
||
536 | 243 | protected function scanComment() |
|
552 | |||
553 | /** |
||
554 | * Scans Newline from input & returns it if found. |
||
555 | * |
||
556 | * @return null|array |
||
557 | */ |
||
558 | 240 | protected function scanNewline() |
|
569 | |||
570 | /** |
||
571 | * Scans text from input & returns it if found. |
||
572 | * |
||
573 | * @return null|array |
||
574 | */ |
||
575 | 224 | protected function scanText() |
|
582 | |||
583 | /** |
||
584 | * Returns step type keyword (Given, When, Then, etc.). |
||
585 | * |
||
586 | * @param string $native Step keyword in provided language |
||
587 | * @return string |
||
588 | */ |
||
589 | 230 | private function getStepKeywordType($native) |
|
614 | } |
||
615 |