Completed
Pull Request — master (#162)
by
unknown
02:07
created

Lexer::scanComment()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 3.3332

Importance

Changes 0
Metric Value
dl 0
loc 16
ccs 6
cts 9
cp 0.6667
rs 9.7333
c 0
b 0
f 0
cc 3
nc 3
nop 0
crap 3.3332
1
<?php
2
3
/*
4
 * This file is part of the Behat Gherkin.
5
 * (c) Konstantin Kudryashov <[email protected]>
6
 *
7
 * For the full copyright and license information, please view the LICENSE
8
 * file that was distributed with this source code.
9
 */
10
11
namespace Behat\Gherkin;
12
13
use Behat\Gherkin\Exception\LexerException;
14
use Behat\Gherkin\Keywords\KeywordsInterface;
15
16
/**
17
 * Gherkin lexer.
18
 *
19
 * @author Konstantin Kudryashov <[email protected]>
20
 */
21
class Lexer
22
{
23
    private $language;
24
    private $lines;
25
    private $linesCount;
26
    private $line;
27
    private $trimmedLine;
28
    private $lineNumber;
29
    private $eos;
30
    private $keywords;
31
    private $keywordsCache = array();
32
    private $stepKeywordTypesCache = array();
33
    private $deferredObjects = array();
34
    private $deferredObjectsCount = 0;
35
    private $stashedToken;
36
    private $inPyString = false;
37
    private $pyStringSwallow = 0;
38
    private $featureStarted = false;
39
    private $allowMultilineArguments = false;
40
    private $allowSteps = false;
41
42
    /**
43
     * Initializes lexer.
44
     *
45
     * @param KeywordsInterface $keywords Keywords holder
46
     */
47 211
    public function __construct(KeywordsInterface $keywords)
48
    {
49 211
        $this->keywords = $keywords;
50 211
    }
51
52
    /**
53
     * Sets lexer input.
54
     *
55
     * @param string $input    Input string
56
     * @param string $language Language name
57
     *
58
     * @throws Exception\LexerException
59
     */
60 209
    public function analyse($input, $language = 'en')
61
    {
62
        // try to detect unsupported encoding
63 209
        if ('UTF-8' !== mb_detect_encoding($input, 'UTF-8', true)) {
64
            throw new LexerException('Feature file is not in UTF8 encoding');
65
        }
66
67 209
        $input = strtr($input, array("\r\n" => "\n", "\r" => "\n"));
68
69 209
        $this->lines = explode("\n", $input);
70 209
        $this->linesCount = count($this->lines);
71 209
        $this->line = $this->lines[0];
72 209
        $this->lineNumber = 1;
73 209
        $this->trimmedLine = null;
74 209
        $this->eos = false;
75
76 209
        $this->deferredObjects = array();
77 209
        $this->deferredObjectsCount = 0;
78 209
        $this->stashedToken = null;
79 209
        $this->inPyString = false;
80 209
        $this->pyStringSwallow = 0;
81
82 209
        $this->featureStarted = false;
83 209
        $this->allowMultilineArguments = false;
84 209
        $this->allowSteps = false;
85
86 209
        $this->keywords->setLanguage($this->language = $language);
87 209
        $this->keywordsCache = array();
88 209
        $this->stepKeywordTypesCache = array();
89 209
    }
90
91
    /**
92
     * Returns current lexer language.
93
     *
94
     * @return string
95
     */
96 208
    public function getLanguage()
97
    {
98 208
        return $this->language;
99
    }
100
101
    /**
102
     * Returns next token or previously stashed one.
103
     *
104
     * @return array
105
     */
106 209
    public function getAdvancedToken()
107
    {
108 209
        return $this->getStashedToken() ?: $this->getNextToken();
109
    }
110
111
    /**
112
     * Defers token.
113
     *
114
     * @param array $token Token to defer
115
     */
116
    public function deferToken(array $token)
117
    {
118
        $token['deferred'] = true;
119
        $this->deferredObjects[] = $token;
120
        ++$this->deferredObjectsCount;
121
    }
122
123
    /**
124
     * Predicts for number of tokens.
125
     *
126
     * @return array
127
     */
128 209
    public function predictToken()
129
    {
130 209
        if (null === $this->stashedToken) {
131 209
            $this->stashedToken = $this->getNextToken();
132
        }
133
134 209
        return $this->stashedToken;
135
    }
136
137
    /**
138
     * Constructs token with specified parameters.
139
     *
140
     * @param string $type  Token type
141
     * @param string $value Token value
142
     *
143
     * @return array
144
     */
145 209
    public function takeToken($type, $value = null)
146
    {
147
        return array(
148 209
            'type'     => $type,
149 209
            'line'     => $this->lineNumber,
150
            'value'    => $value ?: null,
151
            'deferred' => false
152
        );
153
    }
154
155
    /**
156
     * Consumes line from input & increments line counter.
157
     */
158 209
    protected function consumeLine()
159
    {
160 209
        ++$this->lineNumber;
161
162 209
        if (($this->lineNumber - 1) === $this->linesCount) {
163 206
            $this->eos = true;
164
165 206
            return;
166
        }
167
168 209
        $this->line = $this->lines[$this->lineNumber - 1];
169 209
        $this->trimmedLine = null;
170 209
    }
171
172
    /**
173
     * Returns trimmed version of line.
174
     *
175
     * @return string
176
     */
177 209
    protected function getTrimmedLine()
178
    {
179 209
        return null !== $this->trimmedLine ? $this->trimmedLine : $this->trimmedLine = trim($this->line);
180
    }
181
182
    /**
183
     * Returns stashed token or null if hasn't.
184
     *
185
     * @return array|null
186
     */
187 209
    protected function getStashedToken()
188
    {
189 209
        $stashedToken = $this->stashedToken;
190 209
        $this->stashedToken = null;
191
192 209
        return $stashedToken;
193
    }
194
195
    /**
196
     * Returns deferred token or null if hasn't.
197
     *
198
     * @return array|null
199
     */
200 209
    protected function getDeferredToken()
201
    {
202 209
        if (!$this->deferredObjectsCount) {
203 209
            return null;
204
        }
205
206
        --$this->deferredObjectsCount;
207
208
        return array_shift($this->deferredObjects);
209
    }
210
211
    /**
212
     * Returns next token from input.
213
     *
214
     * @return array
215
     */
216 209
    protected function getNextToken()
217
    {
218 209
        return $this->getDeferredToken()
219 209
            ?: $this->scanEOS()
220 209
            ?: $this->scanLanguage()
221 209
            ?: $this->scanComment()
222 209
            ?: $this->scanPyStringOp()
223 209
            ?: $this->scanPyStringContent()
224 209
            ?: $this->scanStep()
225 209
            ?: $this->scanScenario()
226 209
            ?: $this->scanBackground()
227 209
            ?: $this->scanOutline()
228 209
            ?: $this->scanExamples()
229 209
            ?: $this->scanFeature()
230 209
            ?: $this->scanTags()
231 208
            ?: $this->scanTableRow()
232 208
            ?: $this->scanNewline()
233 209
            ?: $this->scanText();
234
    }
235
236
    /**
237
     * Scans for token with specified regex.
238
     *
239
     * @param string $regex Regular expression
240
     * @param string $type  Expected token type
241
     *
242
     * @return null|array
243
     */
244 185
    protected function scanInput($regex, $type)
245
    {
246 185
        if (!preg_match($regex, $this->line, $matches)) {
247
            return null;
248
        }
249
250 185
        $token = $this->takeToken($type, $matches[1]);
251 185
        $this->consumeLine();
252
253 185
        return $token;
254
    }
255
256
    /**
257
     * Scans for token with specified keywords.
258
     *
259
     * @param string $keywords Keywords (splitted with |)
260
     * @param string $type     Expected token type
261
     *
262
     * @return null|array
263
     */
264 209
    protected function scanInputForKeywords($keywords, $type)
265
    {
266 209
        if (!preg_match('/^(\s*)(' . $keywords . '):\s*(.*)/u', $this->line, $matches)) {
267 209
            return null;
268
        }
269
270 208
        $token = $this->takeToken($type, $matches[3]);
271 208
        $token['keyword'] = $matches[2];
272 208
        $token['indent'] = mb_strlen($matches[1], 'utf8');
273
274 208
        $this->consumeLine();
275
276
        // turn off language searching
277 208
        if ('Feature' === $type) {
278 208
            $this->featureStarted = true;
279
        }
280
281
        // turn off PyString and Table searching
282 208
        if ('Feature' === $type || 'Scenario' === $type || 'Outline' === $type) {
283 208
            $this->allowMultilineArguments = false;
284 199
        } elseif ('Examples' === $type) {
285 196
            $this->allowMultilineArguments = true;
286
        }
287
288
        // turn on steps searching
289 208
        if ('Scenario' === $type || 'Background' === $type || 'Outline' === $type) {
290 206
            $this->allowSteps = true;
291
        }
292
293 208
        return $token;
294
    }
295
296
    /**
297
     * Scans EOS from input & returns it if found.
298
     *
299
     * @return null|array
300
     */
301 209
    protected function scanEOS()
302
    {
303 209
        if (!$this->eos) {
304 209
            return null;
305
        }
306
307 205
        return $this->takeToken('EOS');
308
    }
309
310
    /**
311
     * Returns keywords for provided type.
312
     *
313
     * @param string $type Keyword type
314
     *
315
     * @return string
316
     */
317 209
    protected function getKeywords($type)
318
    {
319 209
        if (!isset($this->keywordsCache[$type])) {
320 209
            $getter = 'get' . $type . 'Keywords';
321 209
            $keywords = $this->keywords->$getter();
322
323 209
            if ('Step' === $type) {
324 204
                $padded = array();
325 204
                foreach (explode('|', $keywords) as $keyword) {
326 204
                    $padded[] = false !== mb_strpos($keyword, '<', 0, 'utf8')
327 18
                        ? preg_quote(mb_substr($keyword, 0, -1, 'utf8'), '/') . '\s*'
328 199
                        : preg_quote($keyword, '/') . '\s+';
329
                }
330
331 204
                $keywords = implode('|', $padded);
332
            }
333
334 209
            $this->keywordsCache[$type] = $keywords;
335
        }
336
337 209
        return $this->keywordsCache[$type];
338
    }
339
340
    /**
341
     * Scans Feature from input & returns it if found.
342
     *
343
     * @return null|array
344
     */
345 209
    protected function scanFeature()
346
    {
347 209
        return $this->scanInputForKeywords($this->getKeywords('Feature'), 'Feature');
348
    }
349
350
    /**
351
     * Scans Background from input & returns it if found.
352
     *
353
     * @return null|array
354
     */
355 209
    protected function scanBackground()
356
    {
357 209
        return $this->scanInputForKeywords($this->getKeywords('Background'), 'Background');
358
    }
359
360
    /**
361
     * Scans Scenario from input & returns it if found.
362
     *
363
     * @return null|array
364
     */
365 209
    protected function scanScenario()
366
    {
367 209
        return $this->scanInputForKeywords($this->getKeywords('Scenario'), 'Scenario');
368
    }
369
370
    /**
371
     * Scans Scenario Outline from input & returns it if found.
372
     *
373
     * @return null|array
374
     */
375 209
    protected function scanOutline()
376
    {
377 209
        return $this->scanInputForKeywords($this->getKeywords('Outline'), 'Outline');
378
    }
379
380
    /**
381
     * Scans Scenario Outline Examples from input & returns it if found.
382
     *
383
     * @return null|array
384
     */
385 209
    protected function scanExamples()
386
    {
387 209
        return $this->scanInputForKeywords($this->getKeywords('Examples'), 'Examples');
388
    }
389
390
    /**
391
     * Scans Step from input & returns it if found.
392
     *
393
     * @return null|array
394
     */
395 209
    protected function scanStep()
396
    {
397 209
        if (!$this->allowSteps) {
398 209
            return null;
399
        }
400
401 204
        $keywords = $this->getKeywords('Step');
402 204
        if (!preg_match('/^\s*(' . $keywords . ')([^\s].+)/u', $this->line, $matches)) {
403 203
            return null;
404
        }
405
406 202
        $keyword = trim($matches[1]);
407 202
        $token = $this->takeToken('Step', $keyword);
408 202
        $token['keyword_type'] = $this->getStepKeywordType($keyword);
409 202
        $token['text'] = $matches[2];
410
411 202
        $this->consumeLine();
412 202
        $this->allowMultilineArguments = true;
413
414 202
        return $token;
415
    }
416
417
    /**
418
     * Scans PyString from input & returns it if found.
419
     *
420
     * @return null|array
421
     */
422 209
    protected function scanPyStringOp()
423
    {
424 209
        if (!$this->allowMultilineArguments) {
425 209
            return null;
426
        }
427
428 203
        if (false === ($pos = mb_strpos($this->line, '"""', 0, 'utf8'))) {
429 203
            return null;
430
        }
431
432 4
        $this->inPyString = !$this->inPyString;
433 4
        $token = $this->takeToken('PyStringOp');
434 4
        $this->pyStringSwallow = $pos;
435
436 4
        $this->consumeLine();
437
438 4
        return $token;
439
    }
440
441
    /**
442
     * Scans PyString content.
443
     *
444
     * @return null|array
445
     */
446 209
    protected function scanPyStringContent()
447
    {
448 209
        if (!$this->inPyString) {
449 209
            return null;
450
        }
451
452 4
        $token = $this->scanText();
453
        // swallow trailing spaces
454 4
        $token['value'] = preg_replace('/^\s{0,' . $this->pyStringSwallow . '}/u', '', $token['value']);
455
456 4
        return $token;
457
    }
458
459
    /**
460
     * Scans Table Row from input & returns it if found.
461
     *
462
     * @return null|array
463
     */
464 208
    protected function scanTableRow()
465
    {
466 208
        if (!$this->allowMultilineArguments) {
467 204
            return null;
468
        }
469
470 200
        $line = $this->getTrimmedLine();
471 200
        if (!isset($line[0]) || '|' !== $line[0] || '|' !== substr($line, -1)) {
472 200
            return null;
473
        }
474
475 196
        $token = $this->takeToken('TableRow');
476 196
        $line = mb_substr($line, 1, mb_strlen($line, 'utf8') - 2, 'utf8');
477 196
        $columns = array_map(function ($column) {
478 196
            return trim(str_replace('\\|', '|', $column));
479 196
        }, preg_split('/(?<!\\\)\|/u', $line));
480 196
        $token['columns'] = $columns;
481
482 196
        $this->consumeLine();
483
484 196
        return $token;
485
    }
486
487
    /**
488
     * Scans Tags from input & returns it if found.
489
     *
490
     * @return null|array
491
     */
492 209
    protected function scanTags()
493
    {
494 209
        $line = $this->getTrimmedLine();
495 209
        if (!isset($line[0]) || '@' !== $line[0]) {
496 208
            return null;
497
        }
498
499 3
        $token = $this->takeToken('Tag');
500 3
        $tags = explode('@', mb_substr($line, 1, mb_strlen($line, 'utf8') - 1, 'utf8'));
501 3
        $tags = array_map('trim', $tags);
502 3
        $token['tags'] = $tags;
503
504 3
        $this->consumeLine();
505
506 3
        return $token;
507
    }
508
509
    /**
510
     * Scans Language specifier from input & returns it if found.
511
     *
512
     * @return null|array
513
     */
514 209
    protected function scanLanguage()
515
    {
516 209
        if ($this->featureStarted) {
517 208
            return null;
518
        }
519
520 209
        if ($this->inPyString) {
521
            return null;
522
        }
523
524 209
        if (0 !== mb_strpos(ltrim($this->line), '#', 0, 'utf8')) {
525 209
            return null;
526
        }
527
528 185
        return $this->scanInput('/^\s*\#\s*language:\s*([\w_\-]+)\s*$/', 'Language');
529
    }
530
531
    /**
532
     * Scans Comment from input & returns it if found.
533
     *
534
     * @return null|array
535
     */
536 209
    protected function scanComment()
537
    {
538 209
        if ($this->inPyString) {
539 4
            return null;
540
        }
541
542 209
        $line = $this->getTrimmedLine();
543 209
        if (0 !== mb_strpos($line, '#', 0, 'utf8')) {
544 209
            return null;
545
        }
546
547
        $token = $this->takeToken('Comment', $line);
548
        $this->consumeLine();
549
550
        return $token;
551
    }
552
553
    /**
554
     * Scans Newline from input & returns it if found.
555
     *
556
     * @return null|array
557
     */
558 208
    protected function scanNewline()
559
    {
560 208
        if ('' !== $this->getTrimmedLine()) {
561 196
            return null;
562
        }
563
564 208
        $token = $this->takeToken('Newline', mb_strlen($this->line, 'utf8'));
565 208
        $this->consumeLine();
566
567 208
        return $token;
568
    }
569
570
    /**
571
     * Scans text from input & returns it if found.
572
     *
573
     * @return null|array
574
     */
575 198
    protected function scanText()
576
    {
577 198
        $token = $this->takeToken('Text', $this->line);
578 198
        $this->consumeLine();
579
580 198
        return $token;
581
    }
582
583
    /**
584
     * Returns step type keyword (Given, When, Then, etc.).
585
     *
586
     * @param string $native Step keyword in provided language
587
     * @return string
588
     */
589 202
    private function getStepKeywordType($native)
590
    {
591
        // Consider "*" as a AND keyword so that it is normalized to the previous step type
592 202
        if ('*' === $native) {
593
            return 'And';
594
        }
595
596 202
        if (empty($this->stepKeywordTypesCache)) {
597 202
            $this->stepKeywordTypesCache = array(
598 202
                'Given' => explode('|', $this->keywords->getGivenKeywords()),
599 202
                'When' => explode('|', $this->keywords->getWhenKeywords()),
600 202
                'Then' => explode('|', $this->keywords->getThenKeywords()),
601 202
                'And' => explode('|', $this->keywords->getAndKeywords()),
602 202
                'But' => explode('|', $this->keywords->getButKeywords())
603
            );
604
        }
605
606 202
        foreach ($this->stepKeywordTypesCache as $type => $keywords) {
607 202
            if (in_array($native, $keywords) || in_array($native . '<', $keywords)) {
608 202
                return $type;
609
            }
610
        }
611
612
        return 'Given';
613
    }
614
}
615