Completed
Pull Request — master (#150)
by Matt
02:43
created

Lexer::scanRule()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 0
crap 1
1
<?php
2
3
/*
4
 * This file is part of the Behat Gherkin.
5
 * (c) Konstantin Kudryashov <[email protected]>
6
 *
7
 * For the full copyright and license information, please view the LICENSE
8
 * file that was distributed with this source code.
9
 */
10
11
namespace Behat\Gherkin;
12
13
use Behat\Gherkin\Exception\LexerException;
14
use Behat\Gherkin\Keywords\KeywordsInterface;
15
16
/**
17
 * Gherkin lexer.
18
 *
19
 * @author Konstantin Kudryashov <[email protected]>
20
 */
21
class Lexer
22
{
23
    private $language;
24
    private $lines;
25
    private $linesCount;
26
    private $line;
27
    private $trimmedLine;
28
    private $lineNumber;
29
    private $eos;
30
    private $keywords;
31
    private $keywordsCache = array();
32
    private $stepKeywordTypesCache = array();
33
    private $deferredObjects = array();
34
    private $deferredObjectsCount = 0;
35
    private $stashedToken;
36
    private $inPyString = false;
37
    private $pyStringSwallow = 0;
38
    private $featureStarted = false;
39
    private $allowMultilineArguments = false;
40
    private $allowSteps = false;
41
42
    /**
43
     * Initializes lexer.
44
     *
45
     * @param KeywordsInterface $keywords Keywords holder
46
     */
47 244
    public function __construct(KeywordsInterface $keywords)
48
    {
49 244
        $this->keywords = $keywords;
50 244
    }
51
52
    /**
53
     * Sets lexer input.
54
     *
55
     * @param string $input    Input string
56
     * @param string $language Language name
57
     *
58
     * @throws Exception\LexerException
59
     */
60 242
    public function analyse($input, $language = 'en')
61
    {
62
        // try to detect unsupported encoding
63 242
        if ('UTF-8' !== mb_detect_encoding($input, 'UTF-8', true)) {
64
            throw new LexerException('Feature file is not in UTF8 encoding');
65
        }
66
67 242
        $input = strtr($input, array("\r\n" => "\n", "\r" => "\n"));
68
69 242
        $this->lines = explode("\n", $input);
70 242
        $this->linesCount = count($this->lines);
71 242
        $this->line = $this->lines[0];
72 242
        $this->lineNumber = 1;
73 242
        $this->trimmedLine = null;
74 242
        $this->eos = false;
75
76 242
        $this->deferredObjects = array();
77 242
        $this->deferredObjectsCount = 0;
78 242
        $this->stashedToken = null;
79 242
        $this->inPyString = false;
80 242
        $this->pyStringSwallow = 0;
81
82 242
        $this->featureStarted = false;
83 242
        $this->allowMultilineArguments = false;
84 242
        $this->allowSteps = false;
85
86 242
        $this->keywords->setLanguage($this->language = $language);
87 242
        $this->keywordsCache = array();
88 242
        $this->stepKeywordTypesCache = array();
89 242
    }
90
91
    /**
92
     * Returns current lexer language.
93
     *
94
     * @return string
95
     */
96 230
    public function getLanguage()
97
    {
98 230
        return $this->language;
99
    }
100
101
    /**
102
     * Returns next token or previously stashed one.
103
     *
104
     * @return array
105
     */
106 231
    public function getAdvancedToken()
107
    {
108 231
        return $this->getStashedToken() ?: $this->getNextToken();
109
    }
110
111
    /**
112
     * Defers token.
113
     *
114
     * @param array $token Token to defer
115
     */
116
    public function deferToken(array $token)
117
    {
118
        $token['deferred'] = true;
119
        $this->deferredObjects[] = $token;
120
        ++$this->deferredObjectsCount;
121
    }
122
123
    /**
124
     * Predicts for number of tokens.
125
     *
126
     * @return array
127
     */
128 242
    public function predictToken()
129
    {
130 242
        if (null === $this->stashedToken) {
131 242
            $this->stashedToken = $this->getNextToken();
132
        }
133
134 231
        return $this->stashedToken;
135
    }
136
137
    /**
138
     * Constructs token with specified parameters.
139
     *
140
     * @param string $type  Token type
141
     * @param string $value Token value
142
     *
143
     * @return array
144
     */
145 231
    public function takeToken($type, $value = null)
146
    {
147
        return array(
148 231
            'type'     => $type,
149 231
            'line'     => $this->lineNumber,
150
            'value'    => $value ?: null,
151
            'deferred' => false
152
        );
153
    }
154
155
    /**
156
     * Consumes line from input & increments line counter.
157
     */
158 231
    protected function consumeLine()
159
    {
160 231
        ++$this->lineNumber;
161
162 231
        if (($this->lineNumber - 1) === $this->linesCount) {
163 230
            $this->eos = true;
164
165 230
            return;
166
        }
167
168 231
        $this->line = $this->lines[$this->lineNumber - 1];
169 231
        $this->trimmedLine = null;
170 231
    }
171
172
    /**
173
     * Returns trimmed version of line.
174
     *
175
     * @return string
176
     */
177 242
    protected function getTrimmedLine()
178
    {
179 242
        return null !== $this->trimmedLine ? $this->trimmedLine : $this->trimmedLine = trim($this->line);
180
    }
181
182
    /**
183
     * Returns stashed token or null if hasn't.
184
     *
185
     * @return array|null
186
     */
187 231
    protected function getStashedToken()
188
    {
189 231
        $stashedToken = $this->stashedToken;
190 231
        $this->stashedToken = null;
191
192 231
        return $stashedToken;
193
    }
194
195
    /**
196
     * Returns deferred token or null if hasn't.
197
     *
198
     * @return array|null
199
     */
200 242
    protected function getDeferredToken()
201
    {
202 242
        if (!$this->deferredObjectsCount) {
203 242
            return null;
204
        }
205
206
        --$this->deferredObjectsCount;
207
208
        return array_shift($this->deferredObjects);
209
    }
210
211
    /**
212
     * Returns next token from input.
213
     *
214
     * @return array
215
     */
216 242
    protected function getNextToken()
217
    {
218 242
        return $this->getDeferredToken()
219 242
            ?: $this->scanEOS()
220 242
            ?: $this->scanLanguage()
221 242
            ?: $this->scanComment()
222 242
            ?: $this->scanPyStringOp()
223 242
            ?: $this->scanPyStringContent()
224 242
            ?: $this->scanRule()
225 230
            ?: $this->scanStep()
226 230
            ?: $this->scanScenario()
227 230
            ?: $this->scanBackground()
228 230
            ?: $this->scanOutline()
229 230
            ?: $this->scanExamples()
230 230
            ?: $this->scanFeature()
231 229
            ?: $this->scanTags()
232 228
            ?: $this->scanTableRow()
233 228
            ?: $this->scanNewline()
234 231
            ?: $this->scanText();
235
    }
236
237
    /**
238
     * Scans for token with specified regex.
239
     *
240
     * @param string $regex Regular expression
241
     * @param string $type  Expected token type
242
     *
243
     * @return null|array
244
     */
245 195
    protected function scanInput($regex, $type)
246
    {
247 195
        if (!preg_match($regex, $this->line, $matches)) {
248 4
            return null;
249
        }
250
251 193
        $token = $this->takeToken($type, $matches[1]);
252 193
        $this->consumeLine();
253
254 193
        return $token;
255
    }
256
257
    /**
258
     * Scans for token with specified keywords.
259
     *
260
     * @param string $keywords Keywords (splitted with |)
261
     * @param string $type     Expected token type
262
     *
263
     * @return null|array
264
     */
265 230
    protected function scanInputForKeywords($keywords, $type)
266
    {
267 230
        if (!preg_match('/^(\s*)(' . $keywords . '):\s*(.*)/u', $this->line, $matches)) {
268 230
            return null;
269
        }
270
271 230
        $token = $this->takeToken($type, $matches[3]);
272 230
        $token['keyword'] = $matches[2];
273 230
        $token['indent'] = mb_strlen($matches[1], 'utf8');
274
275 230
        $this->consumeLine();
276
277
        // turn off language searching
278 230
        if ('Feature' === $type) {
279 230
            $this->featureStarted = true;
280
        }
281
282
        // turn off PyString and Table searching
283 230
        if ('Feature' === $type || 'Scenario' === $type || 'Outline' === $type) {
284 230
            $this->allowMultilineArguments = false;
285 207
        } elseif ('Examples' === $type) {
286 203
            $this->allowMultilineArguments = true;
287
        }
288
289
        // turn on steps searching
290 230
        if ('Scenario' === $type || 'Background' === $type || 'Outline' === $type) {
291 227
            $this->allowSteps = true;
292
        }
293
294 230
        return $token;
295
    }
296
297
    /**
298
     * Scans EOS from input & returns it if found.
299
     *
300
     * @return null|array
301
     */
302 242
    protected function scanEOS()
303
    {
304 242
        if (!$this->eos) {
305 242
            return null;
306
        }
307
308 230
        return $this->takeToken('EOS');
309
    }
310
311
    /**
312
     * Returns keywords for provided type.
313
     *
314
     * @param string $type Keyword type
315
     *
316
     * @return string
317
     */
318 242
    protected function getKeywords($type)
319
    {
320 242
        if (!isset($this->keywordsCache[$type])) {
321 242
            $getter = 'get' . $type . 'Keywords';
322 242
            $keywords = $this->keywords->$getter();
323
324 230
            if ('Step' === $type) {
325 226
                $padded = array();
326 226
                foreach (explode('|', $keywords) as $keyword) {
327 226
                    $padded[] = false !== mb_strpos($keyword, '<', 0, 'utf8')
328 19
                        ? preg_quote(mb_substr($keyword, 0, -1, 'utf8'), '/') . '\s*'
329 226
                        : preg_quote($keyword, '/') . '\s+';
330
                }
331
332 226
                $keywords = implode('|', $padded);
333
            }
334
335 230
            $this->keywordsCache[$type] = $keywords;
336
        }
337
338 230
        return $this->keywordsCache[$type];
339
    }
340
341
    /**
342
     * Scans Feature from input & returns it if found.
343
     *
344
     * @return null|array
345
     */
346 230
    protected function scanFeature()
347
    {
348 230
        return $this->scanInputForKeywords($this->getKeywords('Feature'), 'Feature');
349
    }
350
351
    /**
352
     * Scans Background from input & returns it if found.
353
     *
354
     * @return null|array
355
     */
356 230
    protected function scanBackground()
357
    {
358 230
        return $this->scanInputForKeywords($this->getKeywords('Background'), 'Background');
359
    }
360
361
    /**
362
     * Scans Rule from input & returns it if found.
363
     *
364
     * @return null|array
365
     */
366 242
    protected function scanRule()
367
    {
368 242
        return $this->scanInputForKeywords($this->getKeywords('Rule'), 'Rule');
369
    }
370
371
    /**
372
     * Scans Scenario from input & returns it if found.
373
     *
374
     * @return null|array
375
     */
376 230
    protected function scanScenario()
377
    {
378 230
        return $this->scanInputForKeywords($this->getKeywords('Scenario'), 'Scenario');
379
    }
380
381
    /**
382
     * Scans Scenario Outline from input & returns it if found.
383
     *
384
     * @return null|array
385
     */
386 230
    protected function scanOutline()
387
    {
388 230
        return $this->scanInputForKeywords($this->getKeywords('Outline'), 'Outline');
389
    }
390
391
    /**
392
     * Scans Scenario Outline Examples from input & returns it if found.
393
     *
394
     * @return null|array
395
     */
396 230
    protected function scanExamples()
397
    {
398 230
        return $this->scanInputForKeywords($this->getKeywords('Examples'), 'Examples');
399
    }
400
401
    /**
402
     * Scans Step from input & returns it if found.
403
     *
404
     * @return null|array
405
     */
406 230
    protected function scanStep()
407
    {
408 230
        if (!$this->allowSteps) {
409 230
            return null;
410
        }
411
412 226
        $keywords = $this->getKeywords('Step');
413 226
        if (!preg_match('/^\s*(' . $keywords . ')([^\s].+)/u', $this->line, $matches)) {
414 224
            return null;
415
        }
416
417 224
        $keyword = trim($matches[1]);
418 224
        $token = $this->takeToken('Step', $keyword);
419 224
        $token['keyword_type'] = $this->getStepKeywordType($keyword);
420 224
        $token['text'] = $matches[2];
421
422 224
        $this->consumeLine();
423 224
        $this->allowMultilineArguments = true;
424
425 224
        return $token;
426
    }
427
428
    /**
429
     * Scans PyString from input & returns it if found.
430
     *
431
     * @return null|array
432
     */
433 242
    protected function scanPyStringOp()
434
    {
435 242
        if (!$this->allowMultilineArguments) {
436 242
            return null;
437
        }
438
439 224
        if (false === ($pos = mb_strpos($this->line, '"""', 0, 'utf8'))) {
440 224
            return null;
441
        }
442
443 9
        $this->inPyString = !$this->inPyString;
444 9
        $token = $this->takeToken('PyStringOp');
445 9
        $this->pyStringSwallow = $pos;
446
447 9
        $this->consumeLine();
448
449 9
        return $token;
450
    }
451
452
    /**
453
     * Scans PyString content.
454
     *
455
     * @return null|array
456
     */
457 242
    protected function scanPyStringContent()
458
    {
459 242
        if (!$this->inPyString) {
460 242
            return null;
461
        }
462
463 9
        $token = $this->scanText();
464
        // swallow trailing spaces
465 9
        $token['value'] = preg_replace('/^\s{0,' . $this->pyStringSwallow . '}/u', '', $token['value']);
466
467 9
        return $token;
468
    }
469
470
    /**
471
     * Scans Table Row from input & returns it if found.
472
     *
473
     * @return null|array
474
     */
475 228
    protected function scanTableRow()
476
    {
477 228
        if (!$this->allowMultilineArguments) {
478 222
            return null;
479
        }
480
481 221
        $line = $this->getTrimmedLine();
482 221
        if (!isset($line[0]) || '|' !== $line[0] || '|' !== substr($line, -1)) {
483 221
            return null;
484
        }
485
486 204
        $token = $this->takeToken('TableRow');
487 204
        $line = mb_substr($line, 1, mb_strlen($line, 'utf8') - 2, 'utf8');
488 204
        $columns = array_map(function ($column) {
489 204
            return trim(str_replace('\\|', '|', $column));
490 204
        }, preg_split('/(?<!\\\)\|/u', $line));
491 204
        $token['columns'] = $columns;
492
493 204
        $this->consumeLine();
494
495 204
        return $token;
496
    }
497
498
    /**
499
     * Scans Tags from input & returns it if found.
500
     *
501
     * @return null|array
502
     */
503 229
    protected function scanTags()
504
    {
505 229
        $line = $this->getTrimmedLine();
506 229
        if (!isset($line[0]) || '@' !== $line[0]) {
507 228
            return null;
508
        }
509
510 3
        $token = $this->takeToken('Tag');
511 3
        $tags = explode('@', mb_substr($line, 1, mb_strlen($line, 'utf8') - 1, 'utf8'));
512 3
        $tags = array_map('trim', $tags);
513 3
        $token['tags'] = $tags;
514
515 3
        $this->consumeLine();
516
517 3
        return $token;
518
    }
519
520
    /**
521
     * Scans Language specifier from input & returns it if found.
522
     *
523
     * @return null|array
524
     */
525 242
    protected function scanLanguage()
526
    {
527 242
        if ($this->featureStarted) {
528 230
            return null;
529
        }
530
531 242
        if ($this->inPyString) {
532
            return null;
533
        }
534
535 242
        if (0 !== mb_strpos(ltrim($this->line), '#', 0, 'utf8')) {
536 242
            return null;
537
        }
538
539 195
        return $this->scanInput('/^\s*\#\s*language:\s*([\w_\-]+)\s*$/', 'Language');
540
    }
541
542
    /**
543
     * Scans Comment from input & returns it if found.
544
     *
545
     * @return null|array
546
     */
547 242
    protected function scanComment()
548
    {
549 242
        if ($this->inPyString) {
550 9
            return null;
551
        }
552
553 242
        $line = $this->getTrimmedLine();
554 242
        if (0 !== mb_strpos($line, '#', 0, 'utf8')) {
555 242
            return null;
556
        }
557
558 8
        $token = $this->takeToken('Comment', $line);
559 8
        $this->consumeLine();
560
561 8
        return $token;
562
    }
563
564
    /**
565
     * Scans Newline from input & returns it if found.
566
     *
567
     * @return null|array
568
     */
569 228
    protected function scanNewline()
570
    {
571 228
        if ('' !== $this->getTrimmedLine()) {
572 211
            return null;
573
        }
574
575 228
        $token = $this->takeToken('Newline', mb_strlen($this->line, 'utf8'));
576 228
        $this->consumeLine();
577
578 228
        return $token;
579
    }
580
581
    /**
582
     * Scans text from input & returns it if found.
583
     *
584
     * @return null|array
585
     */
586 217
    protected function scanText()
587
    {
588 217
        $token = $this->takeToken('Text', $this->line);
589 217
        $this->consumeLine();
590
591 217
        return $token;
592
    }
593
594
    /**
595
     * Returns step type keyword (Given, When, Then, etc.).
596
     *
597
     * @param string $native Step keyword in provided language
598
     * @return string
599
     */
600 224
    private function getStepKeywordType($native)
601
    {
602
        // Consider "*" as a AND keyword so that it is normalized to the previous step type
603 224
        if ('*' === $native) {
604
            return 'And';
605
        }
606
607 224
        if (empty($this->stepKeywordTypesCache)) {
608 224
            $this->stepKeywordTypesCache = array(
609 224
                'Given' => explode('|', $this->keywords->getGivenKeywords()),
610 224
                'When' => explode('|', $this->keywords->getWhenKeywords()),
611 224
                'Then' => explode('|', $this->keywords->getThenKeywords()),
612 224
                'And' => explode('|', $this->keywords->getAndKeywords()),
613 224
                'But' => explode('|', $this->keywords->getButKeywords())
614
            );
615
        }
616
617 224
        foreach ($this->stepKeywordTypesCache as $type => $keywords) {
618 224
            if (in_array($native, $keywords) || in_array($native . '<', $keywords)) {
619 224
                return $type;
620
            }
621
        }
622
623
        return 'Given';
624
    }
625
}
626