Completed
Pull Request — master (#112)
by Christophe
02:45
created

Lexer   D

Complexity

Total Complexity 89

Size/Duplication

Total Lines 589
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 2

Test Coverage

Coverage 95.65%

Importance

Changes 8
Bugs 0 Features 1
Metric Value
wmc 89
c 8
b 0
f 1
lcom 1
cbo 2
dl 0
loc 589
ccs 220
cts 230
cp 0.9565
rs 4.8717

31 Methods

Rating   Name   Duplication   Size   Complexity  
A getLanguage() 0 4 1
A getAdvancedToken() 0 4 2
A getTrimmedLine() 0 4 2
A scanFeature() 0 4 1
A scanBackground() 0 4 1
A scanScenario() 0 4 1
A scanOutline() 0 4 1
A scanExamples() 0 4 1
B analyse() 0 30 2
A deferToken() 0 6 1
A predictToken() 0 8 2
A takeToken() 0 9 2
A consumeLine() 0 13 2
A getStashedToken() 0 7 1
A getDeferredToken() 0 10 2
C getNextToken() 0 19 16
A scanInput() 0 11 2
D scanInputForKeywords() 0 31 10
A scanStep() 0 21 3
A scanPyStringOp() 0 18 3
A scanPyStringContent() 0 12 2
B scanTableRow() 0 22 5
A scanTags() 0 16 3
A scanLanguage() 0 16 4
A scanComment() 0 16 3
A scanNewline() 0 11 2
B getStepKeywordType() 0 20 5
A __construct() 0 4 1
A scanEOS() 0 8 2
A scanText() 0 7 1
B getKeywords() 0 22 5

How to fix   Complexity   

Complex Class

Complex classes like Lexer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use Lexer, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/*
4
 * This file is part of the Behat Gherkin.
5
 * (c) Konstantin Kudryashov <[email protected]>
6
 *
7
 * For the full copyright and license information, please view the LICENSE
8
 * file that was distributed with this source code.
9
 */
10
11
namespace Behat\Gherkin;
12
13
use Behat\Gherkin\Exception\LexerException;
14
use Behat\Gherkin\Keywords\KeywordsInterface;
15
16
/**
17
 * Gherkin lexer.
18
 *
19
 * @author Konstantin Kudryashov <[email protected]>
20
 */
21
class Lexer
22
{
23
    private $language;
24
    private $lines;
25
    private $linesCount;
26
    private $line;
27
    private $trimmedLine;
28
    private $lineNumber;
29
    private $eos;
30
    private $keywords;
31
    private $keywordsCache = array();
32
    private $stepKeywordTypesCache = array();
33
    private $deferredObjects = array();
34
    private $deferredObjectsCount = 0;
35
    private $stashedToken;
36
    private $inPyString = false;
37
    private $pyStringSwallow = 0;
38
    private $featureStarted = false;
39
    private $allowMultilineArguments = false;
40
    private $allowSteps = false;
41
42
    /**
43
     * Initializes lexer.
44
     *
45
     * @param KeywordsInterface $keywords Keywords holder
46
     */
47 54
    public function __construct(KeywordsInterface $keywords)
48
    {
49 54
        $this->keywords = $keywords;
50 54
    }
51
52
    /**
53
     * Sets lexer input.
54
     *
55
     * @param string $input    Input string
56
     * @param string $language Language name
57
     *
58
     * @throws Exception\LexerException
59
     */
60 52
    public function analyse($input, $language = 'en')
61
    {
62
        // try to detect unsupported encoding
63 52
        if ('UTF-8' !== mb_detect_encoding($input, 'UTF-8', true)) {
64
            throw new LexerException('Feature file is not in UTF8 encoding');
65
        }
66
67 52
        $input = strtr($input, array("\r\n" => "\n", "\r" => "\n"));
68
69 52
        $this->lines = explode("\n", $input);
70 52
        $this->linesCount = count($this->lines);
71 52
        $this->line = $this->lines[0];
72 52
        $this->lineNumber = 1;
73 52
        $this->trimmedLine = null;
74 52
        $this->eos = false;
75
76 52
        $this->deferredObjects = array();
77 52
        $this->deferredObjectsCount = 0;
78 52
        $this->stashedToken = null;
79 52
        $this->inPyString = false;
80 52
        $this->pyStringSwallow = 0;
81
82 52
        $this->featureStarted = false;
83 52
        $this->allowMultilineArguments = false;
84 52
        $this->allowSteps = false;
85
86 52
        $this->keywords->setLanguage($this->language = $language);
87 52
        $this->keywordsCache = array();
88 52
        $this->stepKeywordTypesCache = array();
89 52
    }
90
91
    /**
92
     * Returns current lexer language.
93
     *
94
     * @return string
95
     */
96 51
    public function getLanguage()
97
    {
98 51
        return $this->language;
99
    }
100
101
    /**
102
     * Returns next token or previously stashed one.
103
     *
104
     * @return array
105
     */
106 52
    public function getAdvancedToken()
107
    {
108 52
        return $this->getStashedToken() ?: $this->getNextToken();
109
    }
110
111
    /**
112
     * Defers token.
113
     *
114
     * @param array $token Token to defer
115
     */
116
    public function deferToken(array $token)
117
    {
118
        $token['deferred'] = true;
119
        $this->deferredObjects[] = $token;
120
        ++$this->deferredObjectsCount;
121
    }
122
123
    /**
124
     * Predicts for number of tokens.
125
     *
126
     * @return array
127
     */
128 52
    public function predictToken()
129
    {
130 52
        if (null === $this->stashedToken) {
131 52
            $this->stashedToken = $this->getNextToken();
132 52
        }
133
134 52
        return $this->stashedToken;
135
    }
136
137
    /**
138
     * Constructs token with specified parameters.
139
     *
140
     * @param string $type  Token type
141
     * @param string $value Token value
142
     *
143
     * @return array
144
     */
145 52
    public function takeToken($type, $value = null)
146
    {
147
        return array(
148 52
            'type'     => $type,
149 52
            'line'     => $this->lineNumber,
150 52
            'value'    => $value ?: null,
151
            'deferred' => false
152 52
        );
153
    }
154
155
    /**
156
     * Consumes line from input & increments line counter.
157
     */
158 52
    protected function consumeLine()
159
    {
160 52
        ++$this->lineNumber;
161
162 52
        if (($this->lineNumber - 1) === $this->linesCount) {
163 49
            $this->eos = true;
164
165 49
            return;
166
        }
167
168 52
        $this->line = $this->lines[$this->lineNumber - 1];
169 52
        $this->trimmedLine = null;
170 52
    }
171
172
    /**
173
     * Returns trimmed version of line.
174
     *
175
     * @return string
176
     */
177 52
    protected function getTrimmedLine()
178
    {
179 52
        return null !== $this->trimmedLine ? $this->trimmedLine : $this->trimmedLine = trim($this->line);
180
    }
181
182
    /**
183
     * Returns stashed token or null if hasn't.
184
     *
185
     * @return array|null
186
     */
187 52
    protected function getStashedToken()
188
    {
189 52
        $stashedToken = $this->stashedToken;
190 52
        $this->stashedToken = null;
191
192 52
        return $stashedToken;
193
    }
194
195
    /**
196
     * Returns deferred token or null if hasn't.
197
     *
198
     * @return array|null
199
     */
200 52
    protected function getDeferredToken()
201
    {
202 52
        if (!$this->deferredObjectsCount) {
203 52
            return null;
204
        }
205
206
        --$this->deferredObjectsCount;
207
208
        return array_shift($this->deferredObjects);
209
    }
210
211
    /**
212
     * Returns next token from input.
213
     *
214
     * @return array
215
     */
216 52
    protected function getNextToken()
217
    {
218 52
        return $this->getDeferredToken()
219 52
            ?: $this->scanEOS()
220 52
            ?: $this->scanLanguage()
221 52
            ?: $this->scanComment()
222 52
            ?: $this->scanPyStringOp()
223 52
            ?: $this->scanPyStringContent()
224 52
            ?: $this->scanStep()
225 52
            ?: $this->scanScenario()
226 52
            ?: $this->scanBackground()
227 52
            ?: $this->scanOutline()
228 52
            ?: $this->scanExamples()
229 52
            ?: $this->scanFeature()
230 51
            ?: $this->scanTags()
231 51
            ?: $this->scanTableRow()
232 51
            ?: $this->scanNewline()
233 52
            ?: $this->scanText();
234
    }
235
236
    /**
237
     * Scans for token with specified regex.
238
     *
239
     * @param string $regex Regular expression
240
     * @param string $type  Expected token type
241
     *
242
     * @return null|array
243
     */
244 11
    protected function scanInput($regex, $type)
245
    {
246 11
        if (!preg_match($regex, $this->line, $matches)) {
247 4
            return null;
248
        }
249
250 9
        $token = $this->takeToken($type, $matches[1]);
251 9
        $this->consumeLine();
252
253 9
        return $token;
254
    }
255
256
    /**
257
     * Scans for token with specified keywords.
258
     *
259
     * @param string $keywords Keywords (splitted with |)
260
     * @param string $type     Expected token type
261
     *
262
     * @return null|array
263
     */
264 52
    protected function scanInputForKeywords($keywords, $type)
265
    {
266 52
        if (!preg_match('/^(\s*)(' . $keywords . '):\s*(.*)/u', $this->line, $matches)) {
267 52
            return null;
268
        }
269
270 51
        $token = $this->takeToken($type, $matches[3]);
271 51
        $token['keyword'] = $matches[2];
272 51
        $token['indent'] = mb_strlen($matches[1], 'utf8');
273
274 51
        $this->consumeLine();
275
276
        // turn off language searching
277 51
        if ('Feature' === $type) {
278 51
            $this->featureStarted = true;
279 51
        }
280
281
        // turn off PyString and Table searching
282 51
        if ('Feature' === $type || 'Scenario' === $type || 'Outline' === $type) {
283 51
            $this->allowMultilineArguments = false;
284 51
        } elseif ('Examples' === $type) {
285 16
            $this->allowMultilineArguments = true;
286 16
        }
287
288
        // turn on steps searching
289 51
        if ('Scenario' === $type || 'Background' === $type || 'Outline' === $type) {
290 46
            $this->allowSteps = true;
291 46
        }
292
293 51
        return $token;
294
    }
295
296
    /**
297
     * Scans EOS from input & returns it if found.
298
     *
299
     * @return null|array
300
     */
301 52
    protected function scanEOS()
302
    {
303 52
        if (!$this->eos) {
304 52
            return null;
305
        }
306
307 48
        return $this->takeToken('EOS');
308
    }
309
310
    /**
311
     * Returns keywords for provided type.
312
     *
313
     * @param string $type Keyword type
314
     *
315
     * @return string
316
     */
317 52
    protected function getKeywords($type)
318
    {
319 52
        if (!isset($this->keywordsCache[$type])) {
320 52
            $getter = 'get' . $type . 'Keywords';
321 52
            $keywords = $this->keywords->$getter();
322
323 52
            if ('Step' === $type) {
324 43
                $padded = array();
325 43
                foreach (explode('|', $keywords) as $keyword) {
326 43
                    $padded[] = false !== mb_strpos($keyword, '<', 0, 'utf8')
327 43
                        ? preg_quote(mb_substr($keyword, 0, -1, 'utf8'), '/') . '\s*'
328 43
                        : preg_quote($keyword, '/') . '\s+';
329 43
                }
330
331 43
                $keywords = implode('|', $padded);
332 43
            }
333
334 52
            $this->keywordsCache[$type] = $keywords;
335 52
        }
336
337 52
        return $this->keywordsCache[$type];
338
    }
339
340
    /**
341
     * Scans Feature from input & returns it if found.
342
     *
343
     * @return null|array
344
     */
345 52
    protected function scanFeature()
346
    {
347 52
        return $this->scanInputForKeywords($this->getKeywords('Feature'), 'Feature');
348
    }
349
350
    /**
351
     * Scans Background from input & returns it if found.
352
     *
353
     * @return null|array
354
     */
355 52
    protected function scanBackground()
356
    {
357 52
        return $this->scanInputForKeywords($this->getKeywords('Background'), 'Background');
358
    }
359
360
    /**
361
     * Scans Scenario from input & returns it if found.
362
     *
363
     * @return null|array
364
     */
365 52
    protected function scanScenario()
366
    {
367 52
        return $this->scanInputForKeywords($this->getKeywords('Scenario'), 'Scenario');
368
    }
369
370
    /**
371
     * Scans Scenario Outline from input & returns it if found.
372
     *
373
     * @return null|array
374
     */
375 52
    protected function scanOutline()
376
    {
377 52
        return $this->scanInputForKeywords($this->getKeywords('Outline'), 'Outline');
378
    }
379
380
    /**
381
     * Scans Scenario Outline Examples from input & returns it if found.
382
     *
383
     * @return null|array
384
     */
385 52
    protected function scanExamples()
386
    {
387 52
        return $this->scanInputForKeywords($this->getKeywords('Examples'), 'Examples');
388
    }
389
390
    /**
391
     * Scans Step from input & returns it if found.
392
     *
393
     * @return null|array
394
     */
395 52
    protected function scanStep()
396
    {
397 52
        if (!$this->allowSteps) {
398 52
            return null;
399
        }
400
401 43
        $keywords = $this->getKeywords('Step');
402 43
        if (!preg_match('/^\s*(' . $keywords . ')([^\s].+)/u', $this->line, $matches)) {
403 40
            return null;
404
        }
405
406 39
        $keyword = trim($matches[1]);
407 39
        $token = $this->takeToken('Step', $keyword);
408 39
        $token['keyword_type'] = $this->getStepKeywordType($keyword);
409 39
        $token['text'] = $matches[2];
410
411 39
        $this->consumeLine();
412 39
        $this->allowMultilineArguments = true;
413
414 39
        return $token;
415
    }
416
417
    /**
418
     * Scans PyString from input & returns it if found.
419
     *
420
     * @return null|array
421
     */
422 52
    protected function scanPyStringOp()
423
    {
424 52
        if (!$this->allowMultilineArguments) {
425 52
            return null;
426
        }
427
428 40
        if (false === ($pos = mb_strpos($this->line, '"""', 0, 'utf8'))) {
429 40
            return null;
430
        }
431
432 10
        $this->inPyString = !$this->inPyString;
433 10
        $token = $this->takeToken('PyStringOp');
434 10
        $this->pyStringSwallow = $pos;
435
436 10
        $this->consumeLine();
437
438 10
        return $token;
439
    }
440
441
    /**
442
     * Scans PyString content.
443
     *
444
     * @return null|array
445
     */
446 52
    protected function scanPyStringContent()
447
    {
448 52
        if (!$this->inPyString) {
449 52
            return null;
450
        }
451
452 10
        $token = $this->scanText();
453
        // swallow trailing spaces
454 10
        $token['value'] = preg_replace('/^\s{0,' . $this->pyStringSwallow . '}/u', '', $token['value']);
455
456 10
        return $token;
457
    }
458
459
    /**
460
     * Scans Table Row from input & returns it if found.
461
     *
462
     * @return null|array
463
     */
464 51
    protected function scanTableRow()
465
    {
466 51
        if (!$this->allowMultilineArguments) {
467 45
            return null;
468
        }
469
470 36
        $line = $this->getTrimmedLine();
471 36
        if (!isset($line[0]) || '|' !== $line[0] || '|' !== substr($line, -1)) {
472 36
            return null;
473
        }
474
475 17
        $token = $this->takeToken('TableRow');
476 17
        $line = mb_substr($line, 1, mb_strlen($line, 'utf8') - 2, 'utf8');
477 17
        $columns = array_map(function ($column) {
478 17
            return trim(str_replace('\\|', '|', $column));
479 17
        }, preg_split('/(?<!\\\)\|/u', $line));
480 17
        $token['columns'] = $columns;
481
482 17
        $this->consumeLine();
483
484 17
        return $token;
485
    }
486
487
    /**
488
     * Scans Tags from input & returns it if found.
489
     *
490
     * @return null|array
491
     */
492 51
    protected function scanTags()
493
    {
494 51
        $line = $this->getTrimmedLine();
495 51
        if (!isset($line[0]) || '@' !== $line[0]) {
496 51
            return null;
497
        }
498
499 4
        $token = $this->takeToken('Tag');
500 4
        $tags = explode('@', mb_substr($line, 1, mb_strlen($line, 'utf8') - 1, 'utf8'));
501 4
        $tags = array_map('trim', $tags);
502 4
        $token['tags'] = $tags;
503
504 4
        $this->consumeLine();
505
506 4
        return $token;
507
    }
508
509
    /**
510
     * Scans Language specifier from input & returns it if found.
511
     *
512
     * @return null|array
513
     */
514 52
    protected function scanLanguage()
515
    {
516 52
        if ($this->featureStarted) {
517 51
            return null;
518
        }
519
520 52
        if ($this->inPyString) {
521
            return null;
522
        }
523
524 52
        if (0 !== mb_strpos(ltrim($this->line), '#', 0, 'utf8')) {
525 52
            return null;
526
        }
527
528 11
        return $this->scanInput('/^\s*\#\s*language:\s*([\w_\-]+)\s*$/', 'Language');
529
    }
530
531
    /**
532
     * Scans Comment from input & returns it if found.
533
     *
534
     * @return null|array
535
     */
536 52
    protected function scanComment()
537
    {
538 52
        if ($this->inPyString) {
539 10
            return null;
540
        }
541
542 52
        $line = $this->getTrimmedLine();
543 52
        if (0 !== mb_strpos($line, '#', 0, 'utf8')) {
544 52
            return null;
545
        }
546
547 8
        $token = $this->takeToken('Comment', $line);
548 8
        $this->consumeLine();
549
550 8
        return $token;
551
    }
552
553
    /**
554
     * Scans Newline from input & returns it if found.
555
     *
556
     * @return null|array
557
     */
558 51
    protected function scanNewline()
559
    {
560 51
        if ('' !== $this->getTrimmedLine()) {
561 29
            return null;
562
        }
563
564 51
        $token = $this->takeToken('Newline', mb_strlen($this->line, 'utf8'));
565 51
        $this->consumeLine();
566
567 51
        return $token;
568
    }
569
570
    /**
571
     * Scans text from input & returns it if found.
572
     *
573
     * @return null|array
574
     */
575 36
    protected function scanText()
576
    {
577 36
        $token = $this->takeToken('Text', $this->line);
578 36
        $this->consumeLine();
579
580 36
        return $token;
581
    }
582
583
    /**
584
     * Returns step type keyword (Given, When, Then, etc.).
585
     *
586
     * @param string $native Step keyword in provided language
587
     * @return string
588
     */
589 39
    private function getStepKeywordType($native)
590
    {
591 39
        if (empty($this->stepKeywordTypesCache)) {
592 39
            $this->stepKeywordTypesCache = array(
593 39
                'Given' => explode('|', $this->keywords->getGivenKeywords()),
594 39
                'When' => explode('|', $this->keywords->getWhenKeywords()),
595 39
                'Then' => explode('|', $this->keywords->getThenKeywords()),
596 39
                'And' => explode('|', $this->keywords->getAndKeywords()),
597 39
                'But' => explode('|', $this->keywords->getButKeywords())
598 39
            );
599 39
        }
600
601 39
        foreach ($this->stepKeywordTypesCache as $type => $keywords) {
602 39
            if (in_array($native, $keywords) || in_array($native . '<', $keywords)) {
603 39
                return $type;
604
            }
605 25
        }
606
607
        return 'Given';
608
    }
609
}
610