Lexer::getNextToken()   F
last analyzed

Complexity

Conditions 16
Paths > 20000

Size

Total Lines 19

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 16

Importance

Changes 0
Metric Value
dl 0
loc 19
ccs 17
cts 17
cp 1
rs 1.4
c 0
b 0
f 0
cc 16
nc 32768
nop 0
crap 16

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/*
4
 * This file is part of the Behat Gherkin.
5
 * (c) Konstantin Kudryashov <[email protected]>
6
 *
7
 * For the full copyright and license information, please view the LICENSE
8
 * file that was distributed with this source code.
9
 */
10
11
namespace Behat\Gherkin;
12
13
use Behat\Gherkin\Exception\LexerException;
14
use Behat\Gherkin\Keywords\KeywordsInterface;
15
16
/**
17
 * Gherkin lexer.
18
 *
19
 * @author Konstantin Kudryashov <[email protected]>
20
 */
21
class Lexer
22
{
23
    private $language;
24
    private $lines;
25
    private $linesCount;
26
    private $line;
27
    private $trimmedLine;
28
    private $lineNumber;
29
    private $eos;
30
    private $keywords;
31
    private $keywordsCache = array();
32
    private $stepKeywordTypesCache = array();
33
    private $deferredObjects = array();
34
    private $deferredObjectsCount = 0;
35
    private $stashedToken;
36
    private $inPyString = false;
37
    private $pyStringSwallow = 0;
38
    private $featureStarted = false;
39
    private $allowMultilineArguments = false;
40
    private $allowSteps = false;
41
42
    /**
43
     * Initializes lexer.
44
     *
45
     * @param KeywordsInterface $keywords Keywords holder
46
     */
47 245
    public function __construct(KeywordsInterface $keywords)
48
    {
49 245
        $this->keywords = $keywords;
50 245
    }
51
52
    /**
53
     * Sets lexer input.
54
     *
55
     * @param string $input    Input string
56
     * @param string $language Language name
57
     *
58
     * @throws Exception\LexerException
59
     */
60 243
    public function analyse($input, $language = 'en')
61
    {
62
        // try to detect unsupported encoding
63 243
        if ('UTF-8' !== mb_detect_encoding($input, 'UTF-8', true)) {
64
            throw new LexerException('Feature file is not in UTF8 encoding');
65
        }
66
67 243
        $input = strtr($input, array("\r\n" => "\n", "\r" => "\n"));
68
69 243
        $this->lines = explode("\n", $input);
70 243
        $this->linesCount = count($this->lines);
71 243
        $this->line = $this->lines[0];
72 243
        $this->lineNumber = 1;
73 243
        $this->trimmedLine = null;
74 243
        $this->eos = false;
75
76 243
        $this->deferredObjects = array();
77 243
        $this->deferredObjectsCount = 0;
78 243
        $this->stashedToken = null;
79 243
        $this->inPyString = false;
80 243
        $this->pyStringSwallow = 0;
81
82 243
        $this->featureStarted = false;
83 243
        $this->allowMultilineArguments = false;
84 243
        $this->allowSteps = false;
85
86 243
        $this->keywords->setLanguage($this->language = $language);
87 243
        $this->keywordsCache = array();
88 243
        $this->stepKeywordTypesCache = array();
89 243
    }
90
91
    /**
92
     * Returns current lexer language.
93
     *
94
     * @return string
95
     */
96 242
    public function getLanguage()
97
    {
98 242
        return $this->language;
99
    }
100
101
    /**
102
     * Returns next token or previously stashed one.
103
     *
104
     * @return array
105
     */
106 243
    public function getAdvancedToken()
107
    {
108 243
        return $this->getStashedToken() ?: $this->getNextToken();
109
    }
110
111
    /**
112
     * Defers token.
113
     *
114
     * @param array $token Token to defer
115
     */
116
    public function deferToken(array $token)
117
    {
118
        $token['deferred'] = true;
119
        $this->deferredObjects[] = $token;
120
        ++$this->deferredObjectsCount;
121
    }
122
123
    /**
124
     * Predicts for number of tokens.
125
     *
126
     * @return array
127
     */
128 243
    public function predictToken()
129
    {
130 243
        if (null === $this->stashedToken) {
131 243
            $this->stashedToken = $this->getNextToken();
132
        }
133
134 243
        return $this->stashedToken;
135
    }
136
137
    /**
138
     * Constructs token with specified parameters.
139
     *
140
     * @param string $type  Token type
141
     * @param string $value Token value
142
     *
143
     * @return array
144
     */
145 243
    public function takeToken($type, $value = null)
146
    {
147
        return array(
148 243
            'type'     => $type,
149 243
            'line'     => $this->lineNumber,
150
            'value'    => $value ?: null,
151
            'deferred' => false
152
        );
153
    }
154
155
    /**
156
     * Consumes line from input & increments line counter.
157
     */
158 243
    protected function consumeLine()
159
    {
160 243
        ++$this->lineNumber;
161
162 243
        if (($this->lineNumber - 1) === $this->linesCount) {
163 240
            $this->eos = true;
164
165 240
            return;
166
        }
167
168 243
        $this->line = $this->lines[$this->lineNumber - 1];
169 243
        $this->trimmedLine = null;
170 243
    }
171
172
    /**
173
     * Returns trimmed version of line.
174
     *
175
     * @return string
176
     */
177 243
    protected function getTrimmedLine()
178
    {
179 243
        return null !== $this->trimmedLine ? $this->trimmedLine : $this->trimmedLine = trim($this->line);
180
    }
181
182
    /**
183
     * Returns stashed token or null if hasn't.
184
     *
185
     * @return array|null
186
     */
187 243
    protected function getStashedToken()
188
    {
189 243
        $stashedToken = $this->stashedToken;
190 243
        $this->stashedToken = null;
191
192 243
        return $stashedToken;
193
    }
194
195
    /**
196
     * Returns deferred token or null if hasn't.
197
     *
198
     * @return array|null
199
     */
200 243
    protected function getDeferredToken()
201
    {
202 243
        if (!$this->deferredObjectsCount) {
203 243
            return null;
204
        }
205
206
        --$this->deferredObjectsCount;
207
208
        return array_shift($this->deferredObjects);
209
    }
210
211
    /**
212
     * Returns next token from input.
213
     *
214
     * @return array
215
     */
216 243
    protected function getNextToken()
217
    {
218 243
        return $this->getDeferredToken()
219 243
            ?: $this->scanEOS()
220 243
            ?: $this->scanLanguage()
221 243
            ?: $this->scanComment()
222 243
            ?: $this->scanPyStringOp()
223 243
            ?: $this->scanPyStringContent()
224 243
            ?: $this->scanStep()
225 243
            ?: $this->scanScenario()
226 243
            ?: $this->scanBackground()
227 243
            ?: $this->scanOutline()
228 243
            ?: $this->scanExamples()
229 243
            ?: $this->scanFeature()
230 241
            ?: $this->scanTags()
231 240
            ?: $this->scanTableRow()
232 240
            ?: $this->scanNewline()
233 243
            ?: $this->scanText();
234
    }
235
236
    /**
237
     * Scans for token with specified regex.
238
     *
239
     * @param string $regex Regular expression
240
     * @param string $type  Expected token type
241
     *
242
     * @return null|array
243
     */
244 195
    protected function scanInput($regex, $type)
245
    {
246 195
        if (!preg_match($regex, $this->line, $matches)) {
247 4
            return null;
248
        }
249
250 193
        $token = $this->takeToken($type, $matches[1]);
251 193
        $this->consumeLine();
252
253 193
        return $token;
254
    }
255
256
    /**
257
     * Scans for token with specified keywords.
258
     *
259
     * @param string $keywords Keywords (splitted with |)
260
     * @param string $type     Expected token type
261
     *
262
     * @return null|array
263
     */
264 243
    protected function scanInputForKeywords($keywords, $type)
265
    {
266 243
        if (!preg_match('/^(\s*)(' . $keywords . '):\s*(.*)/u', $this->line, $matches)) {
267 243
            return null;
268
        }
269
270 242
        $token = $this->takeToken($type, $matches[3]);
271 242
        $token['keyword'] = $matches[2];
272 242
        $token['indent'] = mb_strlen($matches[1], 'utf8');
273
274 242
        $this->consumeLine();
275
276
        // turn off language searching
277 242
        if ('Feature' === $type) {
278 242
            $this->featureStarted = true;
279
        }
280
281
        // turn off PyString and Table searching
282 242
        if ('Feature' === $type || 'Scenario' === $type || 'Outline' === $type) {
283 242
            $this->allowMultilineArguments = false;
284 211
        } elseif ('Examples' === $type) {
285 205
            $this->allowMultilineArguments = true;
286
        }
287
288
        // turn on steps searching
289 242
        if ('Scenario' === $type || 'Background' === $type || 'Outline' === $type) {
290 237
            $this->allowSteps = true;
291
        }
292
293 242
        return $token;
294
    }
295
296
    /**
297
     * Scans EOS from input & returns it if found.
298
     *
299
     * @return null|array
300
     */
301 243
    protected function scanEOS()
302
    {
303 243
        if (!$this->eos) {
304 243
            return null;
305
        }
306
307 239
        return $this->takeToken('EOS');
308
    }
309
310
    /**
311
     * Returns keywords for provided type.
312
     *
313
     * @param string $type Keyword type
314
     *
315
     * @return string
316
     */
317 243
    protected function getKeywords($type)
318
    {
319 243
        if (!isset($this->keywordsCache[$type])) {
320 243
            $getter = 'get' . $type . 'Keywords';
321 243
            $keywords = $this->keywords->$getter();
322
323 243
            if ('Step' === $type) {
324 234
                $padded = array();
325 234
                foreach (explode('|', $keywords) as $keyword) {
326 234
                    $padded[] = false !== mb_strpos($keyword, '<', 0, 'utf8')
327 19
                        ? preg_quote(mb_substr($keyword, 0, -1, 'utf8'), '/') . '\s*'
328 228
                        : preg_quote($keyword, '/') . '\s+';
329
                }
330
331 234
                $keywords = implode('|', $padded);
332
            }
333
334 243
            $this->keywordsCache[$type] = $keywords;
335
        }
336
337 243
        return $this->keywordsCache[$type];
338
    }
339
340
    /**
341
     * Scans Feature from input & returns it if found.
342
     *
343
     * @return null|array
344
     */
345 243
    protected function scanFeature()
346
    {
347 243
        return $this->scanInputForKeywords($this->getKeywords('Feature'), 'Feature');
348
    }
349
350
    /**
351
     * Scans Background from input & returns it if found.
352
     *
353
     * @return null|array
354
     */
355 243
    protected function scanBackground()
356
    {
357 243
        return $this->scanInputForKeywords($this->getKeywords('Background'), 'Background');
358
    }
359
360
    /**
361
     * Scans Scenario from input & returns it if found.
362
     *
363
     * @return null|array
364
     */
365 243
    protected function scanScenario()
366
    {
367 243
        return $this->scanInputForKeywords($this->getKeywords('Scenario'), 'Scenario');
368
    }
369
370
    /**
371
     * Scans Scenario Outline from input & returns it if found.
372
     *
373
     * @return null|array
374
     */
375 243
    protected function scanOutline()
376
    {
377 243
        return $this->scanInputForKeywords($this->getKeywords('Outline'), 'Outline');
378
    }
379
380
    /**
381
     * Scans Scenario Outline Examples from input & returns it if found.
382
     *
383
     * @return null|array
384
     */
385 243
    protected function scanExamples()
386
    {
387 243
        return $this->scanInputForKeywords($this->getKeywords('Examples'), 'Examples');
388
    }
389
390
    /**
391
     * Scans Step from input & returns it if found.
392
     *
393
     * @return null|array
394
     */
395 243
    protected function scanStep()
396
    {
397 243
        if (!$this->allowSteps) {
398 243
            return null;
399
        }
400
401 234
        $keywords = $this->getKeywords('Step');
402 234
        if (!preg_match('/^\s*(' . $keywords . ')([^\s].*)/u', $this->line, $matches)) {
403 230
            return null;
404
        }
405
406 230
        $keyword = trim($matches[1]);
407 230
        $token = $this->takeToken('Step', $keyword);
408 230
        $token['keyword_type'] = $this->getStepKeywordType($keyword);
409 230
        $token['text'] = $matches[2];
410
411 230
        $this->consumeLine();
412 230
        $this->allowMultilineArguments = true;
413
414 230
        return $token;
415
    }
416
417
    /**
418
     * Scans PyString from input & returns it if found.
419
     *
420
     * @return null|array
421
     */
422 243
    protected function scanPyStringOp()
423
    {
424 243
        if (!$this->allowMultilineArguments) {
425 243
            return null;
426
        }
427
428 230
        if (false === ($pos = mb_strpos($this->line, '"""', 0, 'utf8'))) {
429 230
            return null;
430
        }
431
432 10
        $this->inPyString = !$this->inPyString;
433 10
        $token = $this->takeToken('PyStringOp');
434 10
        $this->pyStringSwallow = $pos;
435
436 10
        $this->consumeLine();
437
438 10
        return $token;
439
    }
440
441
    /**
442
     * Scans PyString content.
443
     *
444
     * @return null|array
445
     */
446 243
    protected function scanPyStringContent()
447
    {
448 243
        if (!$this->inPyString) {
449 243
            return null;
450
        }
451
452 10
        $token = $this->scanText();
453
        // swallow trailing spaces
454 10
        $token['value'] = preg_replace('/^\s{0,' . $this->pyStringSwallow . '}/u', '', $token['value']);
455
456 10
        return $token;
457
    }
458
459
    /**
460
     * Scans Table Row from input & returns it if found.
461
     *
462
     * @return null|array
463
     */
464 240
    protected function scanTableRow()
465
    {
466 240
        if (!$this->allowMultilineArguments) {
467 234
            return null;
468
        }
469
470 225
        $line = $this->getTrimmedLine();
471 225
        if (!isset($line[0]) || '|' !== $line[0] || '|' !== substr($line, -1)) {
472 225
            return null;
473
        }
474
475 206
        $token = $this->takeToken('TableRow');
476 206
        $line = mb_substr($line, 1, mb_strlen($line, 'utf8') - 2, 'utf8');
477
        $columns = array_map(function ($column) {
478 206
            return trim(str_replace('\\|', '|', $column));
479 206
        }, preg_split('/(?<!\\\)\|/u', $line));
480 206
        $token['columns'] = $columns;
481
482 206
        $this->consumeLine();
483
484 206
        return $token;
485
    }
486
487
    /**
488
     * Scans Tags from input & returns it if found.
489
     *
490
     * @return null|array
491
     */
492 241
    protected function scanTags()
493
    {
494 241
        $line = $this->getTrimmedLine();
495 241
        if (!isset($line[0]) || '@' !== $line[0]) {
496 240
            return null;
497
        }
498
499 10
        $token = $this->takeToken('Tag');
500 10
        $tags = explode('@', mb_substr($line, 1, mb_strlen($line, 'utf8') - 1, 'utf8'));
501 10
        $tags = array_map('trim', $tags);
502 10
        $token['tags'] = $tags;
503
504 10
        $this->consumeLine();
505
506 10
        return $token;
507
    }
508
509
    /**
510
     * Scans Language specifier from input & returns it if found.
511
     *
512
     * @return null|array
513
     */
514 243
    protected function scanLanguage()
515
    {
516 243
        if ($this->featureStarted) {
517 242
            return null;
518
        }
519
520 243
        if ($this->inPyString) {
521
            return null;
522
        }
523
524 243
        if (0 !== mb_strpos(ltrim($this->line), '#', 0, 'utf8')) {
525 243
            return null;
526
        }
527
528 195
        return $this->scanInput('/^\s*\#\s*language:\s*([\w_\-]+)\s*$/', 'Language');
529
    }
530
531
    /**
532
     * Scans Comment from input & returns it if found.
533
     *
534
     * @return null|array
535
     */
536 243
    protected function scanComment()
537
    {
538 243
        if ($this->inPyString) {
539 10
            return null;
540
        }
541
542 243
        $line = $this->getTrimmedLine();
543 243
        if (0 !== mb_strpos($line, '#', 0, 'utf8')) {
544 243
            return null;
545
        }
546
547 8
        $token = $this->takeToken('Comment', $line);
548 8
        $this->consumeLine();
549
550 8
        return $token;
551
    }
552
553
    /**
554
     * Scans Newline from input & returns it if found.
555
     *
556
     * @return null|array
557
     */
558 240
    protected function scanNewline()
559
    {
560 240
        if ('' !== $this->getTrimmedLine()) {
561 217
            return null;
562
        }
563
564 240
        $token = $this->takeToken('Newline', mb_strlen($this->line, 'utf8'));
565 240
        $this->consumeLine();
566
567 240
        return $token;
568
    }
569
570
    /**
571
     * Scans text from input & returns it if found.
572
     *
573
     * @return null|array
574
     */
575 224
    protected function scanText()
576
    {
577 224
        $token = $this->takeToken('Text', $this->line);
578 224
        $this->consumeLine();
579
580 224
        return $token;
581
    }
582
583
    /**
584
     * Returns step type keyword (Given, When, Then, etc.).
585
     *
586
     * @param string $native Step keyword in provided language
587
     * @return string
588
     */
589 230
    private function getStepKeywordType($native)
590
    {
591
        // Consider "*" as a AND keyword so that it is normalized to the previous step type
592 230
        if ('*' === $native) {
593
            return 'And';
594
        }
595
596 230
        if (empty($this->stepKeywordTypesCache)) {
597 230
            $this->stepKeywordTypesCache = array(
598 230
                'Given' => explode('|', $this->keywords->getGivenKeywords()),
599 230
                'When' => explode('|', $this->keywords->getWhenKeywords()),
600 230
                'Then' => explode('|', $this->keywords->getThenKeywords()),
601 230
                'And' => explode('|', $this->keywords->getAndKeywords()),
602 230
                'But' => explode('|', $this->keywords->getButKeywords())
603
            );
604
        }
605
606 230
        foreach ($this->stepKeywordTypesCache as $type => $keywords) {
607 230
            if (in_array($native, $keywords) || in_array($native . '<', $keywords)) {
608 230
                return $type;
609
            }
610
        }
611
612
        return 'Given';
613
    }
614
}
615