Complex classes like Lexer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Lexer, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
21 | class Lexer |
||
22 | { |
||
23 | private $language; |
||
24 | private $lines; |
||
25 | private $linesCount; |
||
26 | private $line; |
||
27 | private $trimmedLine; |
||
28 | private $lineNumber; |
||
29 | private $eos; |
||
30 | private $keywords; |
||
31 | private $keywordsCache = array(); |
||
32 | private $stepKeywordTypesCache = array(); |
||
33 | private $deferredObjects = array(); |
||
34 | private $deferredObjectsCount = 0; |
||
35 | private $stashedToken; |
||
36 | private $inPyString = false; |
||
37 | private $pyStringSwallow = 0; |
||
38 | private $featureStarted = false; |
||
39 | private $allowMultilineArguments = false; |
||
40 | private $allowSteps = false; |
||
41 | |||
42 | /** |
||
43 | * Initializes lexer. |
||
44 | * |
||
45 | * @param KeywordsInterface $keywords Keywords holder |
||
46 | */ |
||
47 | 54 | public function __construct(KeywordsInterface $keywords) |
|
48 | { |
||
49 | 54 | $this->keywords = $keywords; |
|
50 | 54 | } |
|
51 | |||
52 | /** |
||
53 | * Sets lexer input. |
||
54 | * |
||
55 | * @param string $input Input string |
||
56 | * @param string $language Language name |
||
57 | * |
||
58 | * @throws Exception\LexerException |
||
59 | */ |
||
60 | 52 | public function analyse($input, $language = 'en') |
|
61 | { |
||
62 | // try to detect unsupported encoding |
||
63 | 52 | if ('UTF-8' !== mb_detect_encoding($input, 'UTF-8', true)) { |
|
64 | throw new LexerException('Feature file is not in UTF8 encoding'); |
||
65 | } |
||
66 | |||
67 | 52 | $input = strtr($input, array("\r\n" => "\n", "\r" => "\n")); |
|
68 | |||
69 | 52 | $this->lines = explode("\n", $input); |
|
70 | 52 | $this->linesCount = count($this->lines); |
|
71 | 52 | $this->line = $this->lines[0]; |
|
72 | 52 | $this->lineNumber = 1; |
|
73 | 52 | $this->trimmedLine = null; |
|
74 | 52 | $this->eos = false; |
|
75 | |||
76 | 52 | $this->deferredObjects = array(); |
|
77 | 52 | $this->deferredObjectsCount = 0; |
|
78 | 52 | $this->stashedToken = null; |
|
79 | 52 | $this->inPyString = false; |
|
80 | 52 | $this->pyStringSwallow = 0; |
|
81 | |||
82 | 52 | $this->featureStarted = false; |
|
83 | 52 | $this->allowMultilineArguments = false; |
|
84 | 52 | $this->allowSteps = false; |
|
85 | |||
86 | 52 | $this->keywords->setLanguage($this->language = $language); |
|
87 | 52 | $this->keywordsCache = array(); |
|
88 | 52 | $this->stepKeywordTypesCache = array(); |
|
89 | 52 | } |
|
90 | |||
91 | /** |
||
92 | * Returns current lexer language. |
||
93 | * |
||
94 | * @return string |
||
95 | */ |
||
96 | 51 | public function getLanguage() |
|
100 | |||
101 | /** |
||
102 | * Returns next token or previously stashed one. |
||
103 | * |
||
104 | * @return array |
||
105 | */ |
||
106 | 52 | public function getAdvancedToken() |
|
110 | |||
111 | /** |
||
112 | * Defers token. |
||
113 | * |
||
114 | * @param array $token Token to defer |
||
115 | */ |
||
116 | public function deferToken(array $token) |
||
117 | { |
||
118 | $token['deferred'] = true; |
||
119 | $this->deferredObjects[] = $token; |
||
120 | ++$this->deferredObjectsCount; |
||
121 | } |
||
122 | |||
123 | /** |
||
124 | * Predicts for number of tokens. |
||
125 | * |
||
126 | * @return array |
||
127 | */ |
||
128 | 52 | public function predictToken() |
|
129 | { |
||
130 | 52 | if (null === $this->stashedToken) { |
|
131 | 52 | $this->stashedToken = $this->getNextToken(); |
|
132 | 52 | } |
|
133 | |||
134 | 52 | return $this->stashedToken; |
|
135 | } |
||
136 | |||
137 | /** |
||
138 | * Constructs token with specified parameters. |
||
139 | * |
||
140 | * @param string $type Token type |
||
141 | * @param string $value Token value |
||
142 | * |
||
143 | * @return array |
||
144 | */ |
||
145 | 52 | public function takeToken($type, $value = null) |
|
146 | { |
||
147 | return array( |
||
148 | 52 | 'type' => $type, |
|
149 | 52 | 'line' => $this->lineNumber, |
|
150 | 52 | 'value' => $value ?: null, |
|
151 | 'deferred' => false |
||
152 | 52 | ); |
|
153 | } |
||
154 | |||
155 | /** |
||
156 | * Consumes line from input & increments line counter. |
||
157 | */ |
||
158 | 52 | protected function consumeLine() |
|
159 | { |
||
160 | 52 | ++$this->lineNumber; |
|
161 | |||
162 | 52 | if (($this->lineNumber - 1) === $this->linesCount) { |
|
163 | 49 | $this->eos = true; |
|
164 | |||
165 | 49 | return; |
|
166 | } |
||
167 | |||
168 | 52 | $this->line = $this->lines[$this->lineNumber - 1]; |
|
169 | 52 | $this->trimmedLine = null; |
|
170 | 52 | } |
|
171 | |||
172 | /** |
||
173 | * Returns trimmed version of line. |
||
174 | * |
||
175 | * @return string |
||
176 | */ |
||
177 | 52 | protected function getTrimmedLine() |
|
181 | |||
182 | /** |
||
183 | * Returns stashed token or null if hasn't. |
||
184 | * |
||
185 | * @return array|null |
||
186 | */ |
||
187 | 52 | protected function getStashedToken() |
|
188 | { |
||
189 | 52 | $stashedToken = $this->stashedToken; |
|
190 | 52 | $this->stashedToken = null; |
|
191 | |||
192 | 52 | return $stashedToken; |
|
193 | } |
||
194 | |||
195 | /** |
||
196 | * Returns deferred token or null if hasn't. |
||
197 | * |
||
198 | * @return array|null |
||
199 | */ |
||
200 | 52 | protected function getDeferredToken() |
|
201 | { |
||
202 | 52 | if (!$this->deferredObjectsCount) { |
|
203 | 52 | return null; |
|
204 | } |
||
205 | |||
206 | --$this->deferredObjectsCount; |
||
207 | |||
208 | return array_shift($this->deferredObjects); |
||
209 | } |
||
210 | |||
211 | /** |
||
212 | * Returns next token from input. |
||
213 | * |
||
214 | * @return array |
||
215 | */ |
||
216 | 52 | protected function getNextToken() |
|
217 | { |
||
218 | 52 | return $this->getDeferredToken() |
|
219 | 52 | ?: $this->scanEOS() |
|
220 | 52 | ?: $this->scanLanguage() |
|
221 | 52 | ?: $this->scanComment() |
|
222 | 52 | ?: $this->scanPyStringOp() |
|
223 | 52 | ?: $this->scanPyStringContent() |
|
224 | 52 | ?: $this->scanStep() |
|
225 | 52 | ?: $this->scanScenario() |
|
226 | 52 | ?: $this->scanBackground() |
|
227 | 52 | ?: $this->scanOutline() |
|
228 | 52 | ?: $this->scanExamples() |
|
229 | 52 | ?: $this->scanFeature() |
|
230 | 51 | ?: $this->scanTags() |
|
231 | 51 | ?: $this->scanTableRow() |
|
232 | 51 | ?: $this->scanNewline() |
|
233 | 52 | ?: $this->scanText(); |
|
234 | } |
||
235 | |||
236 | /** |
||
237 | * Scans for token with specified regex. |
||
238 | * |
||
239 | * @param string $regex Regular expression |
||
240 | * @param string $type Expected token type |
||
241 | * |
||
242 | * @return null|array |
||
243 | */ |
||
244 | 11 | protected function scanInput($regex, $type) |
|
245 | { |
||
246 | 11 | if (!preg_match($regex, $this->line, $matches)) { |
|
247 | 4 | return null; |
|
248 | } |
||
249 | |||
250 | 9 | $token = $this->takeToken($type, $matches[1]); |
|
251 | 9 | $this->consumeLine(); |
|
252 | |||
253 | 9 | return $token; |
|
254 | } |
||
255 | |||
256 | /** |
||
257 | * Scans for token with specified keywords. |
||
258 | * |
||
259 | * @param string $keywords Keywords (splitted with |) |
||
260 | * @param string $type Expected token type |
||
261 | * |
||
262 | * @return null|array |
||
263 | */ |
||
264 | 52 | protected function scanInputForKeywords($keywords, $type) |
|
265 | { |
||
266 | 52 | if (!preg_match('/^(\s*)(' . $keywords . '):\s*(.*)/u', $this->line, $matches)) { |
|
267 | 52 | return null; |
|
268 | } |
||
269 | |||
270 | 51 | $token = $this->takeToken($type, $matches[3]); |
|
271 | 51 | $token['keyword'] = $matches[2]; |
|
272 | 51 | $token['indent'] = mb_strlen($matches[1], 'utf8'); |
|
273 | |||
274 | 51 | $this->consumeLine(); |
|
275 | |||
276 | // turn off language searching |
||
277 | 51 | if ('Feature' === $type) { |
|
278 | 51 | $this->featureStarted = true; |
|
279 | 51 | } |
|
280 | |||
281 | // turn off PyString and Table searching |
||
282 | 51 | if ('Feature' === $type || 'Scenario' === $type || 'Outline' === $type) { |
|
283 | 51 | $this->allowMultilineArguments = false; |
|
284 | 51 | } elseif ('Examples' === $type) { |
|
285 | 16 | $this->allowMultilineArguments = true; |
|
286 | 16 | } |
|
287 | |||
288 | // turn on steps searching |
||
289 | 51 | if ('Scenario' === $type || 'Background' === $type || 'Outline' === $type) { |
|
290 | 46 | $this->allowSteps = true; |
|
291 | 46 | } |
|
292 | |||
293 | 51 | return $token; |
|
294 | } |
||
295 | |||
296 | /** |
||
297 | * Scans EOS from input & returns it if found. |
||
298 | * |
||
299 | * @return null|array |
||
300 | */ |
||
301 | 52 | protected function scanEOS() |
|
302 | { |
||
303 | 52 | if (!$this->eos) { |
|
304 | 52 | return null; |
|
305 | } |
||
306 | |||
307 | 48 | return $this->takeToken('EOS'); |
|
308 | } |
||
309 | |||
310 | /** |
||
311 | * Returns keywords for provided type. |
||
312 | * |
||
313 | * @param string $type Keyword type |
||
314 | * |
||
315 | * @return string |
||
316 | */ |
||
317 | 52 | protected function getKeywords($type) |
|
318 | { |
||
319 | 52 | if (!isset($this->keywordsCache[$type])) { |
|
320 | 52 | $getter = 'get' . $type . 'Keywords'; |
|
321 | 52 | $keywords = $this->keywords->$getter(); |
|
322 | |||
323 | 52 | if ('Step' === $type) { |
|
324 | 43 | $padded = array(); |
|
325 | 43 | foreach (explode('|', $keywords) as $keyword) { |
|
326 | 43 | $padded[] = false !== mb_strpos($keyword, '<', 0, 'utf8') |
|
327 | 43 | ? preg_quote(mb_substr($keyword, 0, -1, 'utf8'), '/') . '\s*' |
|
328 | 43 | : preg_quote($keyword, '/') . '\s+'; |
|
329 | 43 | } |
|
330 | |||
331 | 43 | $keywords = implode('|', $padded); |
|
332 | 43 | } |
|
333 | |||
334 | 52 | $this->keywordsCache[$type] = $keywords; |
|
335 | 52 | } |
|
336 | |||
337 | 52 | return $this->keywordsCache[$type]; |
|
338 | } |
||
339 | |||
340 | /** |
||
341 | * Scans Feature from input & returns it if found. |
||
342 | * |
||
343 | * @return null|array |
||
344 | */ |
||
345 | 52 | protected function scanFeature() |
|
349 | |||
350 | /** |
||
351 | * Scans Background from input & returns it if found. |
||
352 | * |
||
353 | * @return null|array |
||
354 | */ |
||
355 | 52 | protected function scanBackground() |
|
359 | |||
360 | /** |
||
361 | * Scans Scenario from input & returns it if found. |
||
362 | * |
||
363 | * @return null|array |
||
364 | */ |
||
365 | 52 | protected function scanScenario() |
|
369 | |||
370 | /** |
||
371 | * Scans Scenario Outline from input & returns it if found. |
||
372 | * |
||
373 | * @return null|array |
||
374 | */ |
||
375 | 52 | protected function scanOutline() |
|
379 | |||
380 | /** |
||
381 | * Scans Scenario Outline Examples from input & returns it if found. |
||
382 | * |
||
383 | * @return null|array |
||
384 | */ |
||
385 | 52 | protected function scanExamples() |
|
389 | |||
390 | /** |
||
391 | * Scans Step from input & returns it if found. |
||
392 | * |
||
393 | * @return null|array |
||
394 | */ |
||
395 | 52 | protected function scanStep() |
|
396 | { |
||
397 | 52 | if (!$this->allowSteps) { |
|
398 | 52 | return null; |
|
399 | } |
||
416 | |||
417 | /** |
||
418 | * Scans PyString from input & returns it if found. |
||
419 | * |
||
420 | * @return null|array |
||
421 | */ |
||
422 | 52 | protected function scanPyStringOp() |
|
440 | |||
441 | /** |
||
442 | * Scans PyString content. |
||
443 | * |
||
444 | * @return null|array |
||
445 | */ |
||
446 | 52 | protected function scanPyStringContent() |
|
458 | |||
459 | /** |
||
460 | * Scans Table Row from input & returns it if found. |
||
461 | * |
||
462 | * @return null|array |
||
463 | */ |
||
464 | 51 | protected function scanTableRow() |
|
486 | |||
487 | /** |
||
488 | * Scans Tags from input & returns it if found. |
||
489 | * |
||
490 | * @return null|array |
||
491 | */ |
||
492 | 51 | protected function scanTags() |
|
508 | |||
509 | /** |
||
510 | * Scans Language specifier from input & returns it if found. |
||
511 | * |
||
512 | * @return null|array |
||
513 | */ |
||
514 | 52 | protected function scanLanguage() |
|
530 | |||
531 | /** |
||
532 | * Scans Comment from input & returns it if found. |
||
533 | * |
||
534 | * @return null|array |
||
535 | */ |
||
536 | 52 | protected function scanComment() |
|
552 | |||
553 | /** |
||
554 | * Scans Newline from input & returns it if found. |
||
555 | * |
||
556 | * @return null|array |
||
557 | */ |
||
558 | 51 | protected function scanNewline() |
|
569 | |||
570 | /** |
||
571 | * Scans text from input & returns it if found. |
||
572 | * |
||
573 | * @return null|array |
||
574 | */ |
||
575 | 36 | protected function scanText() |
|
576 | { |
||
577 | 36 | $token = $this->takeToken('Text', $this->line); |
|
578 | 36 | $this->consumeLine(); |
|
579 | |||
580 | 36 | return $token; |
|
581 | } |
||
582 | |||
583 | /** |
||
584 | * Returns step type keyword (Given, When, Then, etc.). |
||
585 | * |
||
586 | * @param string $native Step keyword in provided language |
||
587 | * @return string |
||
588 | */ |
||
589 | 39 | private function getStepKeywordType($native) |
|
609 | } |
||
610 |