BufferedTokenStream::getIndex()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 3
ccs 2
cts 2
cp 1
crap 1
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Antlr\Antlr4\Runtime;
6
7
use Antlr\Antlr4\Runtime\Utils\Set;
8
9
/**
10
 * This implementation of {@see TokenStream} loads tokens from a
11
 * {@see TokenSource} on-demand, and places the tokens in a buffer to provide
12
 * access to any previous token by index.
13
 *
14
 * This token stream ignores the value of {@see Token::getChannel()}. If your
15
 * parser requires the token stream filter tokens to only those on a particular
16
 * channel, such as {@see Token::DEFAULT_CHANNEL} or
17
 * {@see Token::HIDDEN_CHANNEL}, use a filtering token stream such a
18
 * {@see CommonTokenStream}.
19
 */
20
class BufferedTokenStream implements TokenStream
21
{
22
    /**
23
     * The {@see TokenSource} from which tokens for this stream are fetched.
24
     *
25
     * @var TokenSource
26
     */
27
    protected $tokenSource;
28
29
    /**
30
     * A collection of all tokens fetched from the token source. The list is
31
     * considered a complete view of the input once
32
     * {@see BufferedTokenStream::fetchedEOF()} is set to `true`.
33
     *
34
     * @var array<Token>
35
     */
36
    protected $tokens = [];
37
38
    /**
39
     * The index into {@see BufferedTokenStream::tokens()} of the current token
40
     * (next token to {@see BufferedTokenStream::consume()}).
41
     * {@see BufferedTokenStream::tokens()}`[{@see BufferedTokenStream::p()}]`
42
     * should be {@see BufferedTokenStream::LT(1)}.
43
     *
44
     * This field is set to -1 when the stream is first constructed or when
45
     * {@see BufferedTokenStream::setTokenSource()} is called, indicating that
46
     * the first token has not yet been fetched from the token source. For
47
     * additional information, see the documentation of {@see IntStream} for
48
     * a description of Initializing Methods.
49
     *
50
     * @var int
51
     */
52
    protected $index = -1;
53
54
    /**
55
     * Indicates whether the {@see Token::EOF} token has been fetched from
56
     * {@see BufferedTokenStream::tokenSource()} and added to
57
     * {@see BufferedTokenStream::tokens()}. This field improves  performance
58
     * for the following cases:
59
     *
60
     * - {@see BufferedTokenStream::consume()}: The lookahead check in
61
     *    {@see BufferedTokenStream::consume()} to prevent consuming the
62
     *    EOF symbol is optimized by checking the values of
63
     *    {@see BufferedTokenStream::fetchedEOF()} and
64
     *    {@see BufferedTokenStream::p()} instead of calling
65
     *    {@see BufferedTokenStream::LA()}.
66
     * - {@see BufferedTokenStream::fetch()}: The check to prevent adding multiple
67
     *    EOF symbols into {@see BufferedTokenStream::tokens()} is trivial with
68
     *    this field.
69
     *
70
     * @var bool
71
     */
72
    protected $fetchedEOF = false;
73
74 7
    public function __construct(TokenSource $tokenSource)
75
    {
76 7
        $this->tokenSource = $tokenSource;
77 7
    }
78
79
    public function getTokenSource() : TokenSource
80
    {
81
        return $this->tokenSource;
82
    }
83
84 7
    public function getIndex() : int
85
    {
86 7
        return $this->index;
87
    }
88
89 4
    public function mark() : int
90
    {
91 4
        return 0;
92
    }
93
94 4
    public function release(int $marker) : void
95
    {
96
        // no resources to release
97 4
    }
98
99 4
    public function seek(int $index) : void
100
    {
101 4
        $this->lazyInit();
102
103 4
        $this->index = $this->adjustSeekIndex($index);
104 4
    }
105
106
    public function getLength() : int
107
    {
108
        return \count($this->tokens);
109
    }
110
111 5
    public function consume() : void
112
    {
113 5
        $skipEofCheck = false;
114
115 5
        if ($this->index >= 0) {
116 5
            if ($this->fetchedEOF) {
117
                // the last token in tokens is EOF. skip check if p indexes any
118
                // fetched token except the last.
119
                $skipEofCheck = $this->index < \count($this->tokens) - 1;
120
            } else {
121
                // no EOF token in tokens. skip check if p indexes a fetched token.
122 5
                $skipEofCheck = $this->index < \count($this->tokens);
123
            }
124
        }
125
126 5
        if (!$skipEofCheck && $this->LA(1) === Token::EOF) {
127
            throw new \InvalidArgumentException('Cannot consume EOF.');
128
        }
129
130 5
        if ($this->sync($this->index + 1)) {
131 5
            $this->index = $this->adjustSeekIndex($this->index + 1);
132
        }
133 5
    }
134
135
    /**
136
     * Make sure index `i` in tokens has a token.
137
     *
138
     * @return bool `true` if a token is located at index `i`,
139
     *              otherwise `false`.
140
     *
141
     * @see BufferedTokenStream::get()
142
     */
143 7
    public function sync(int $i) : bool
144
    {
145 7
        $n = $i - \count($this->tokens) + 1; // how many more elements we need?
146
147 7
        if ($n > 0) {
148 7
            $fetched = $this->fetch($n);
149
150 7
            return $fetched >= $n;
151
        }
152
153 7
        return true;
154
    }
155
156 7
    public function fetch(int $n) : int
157
    {
158 7
        if ($this->fetchedEOF) {
159 3
            return 0;
160
        }
161
162 7
        for ($i = 0; $i < $n; $i++) {
163
            /** @var WritableToken $token */
164 7
            $token = $this->tokenSource->nextToken();
165 7
            $token->setTokenIndex(\count($this->tokens));
166
167 7
            $this->tokens[] = $token;
168
169 7
            if ($token->getType() === Token::EOF) {
170 7
                $this->fetchedEOF = true;
171
172 7
                return $i + 1;
173
            }
174
        }
175
176 5
        return $n;
177
    }
178
179 4
    public function get(int $index) : Token
180
    {
181 4
        $count = \count($this->tokens);
182
183 4
        if ($index < 0 || $index >= $count) {
184
            throw new \OutOfBoundsException(\sprintf(
185
                'Token index %d out of range 0..%d.',
186
                $index,
187
                $count
188
            ));
189
        }
190
191 4
        $this->lazyInit();
192
193 4
        return $this->tokens[$index];
194
    }
195
196 7
    public function LA(int $i) : int
197
    {
198 7
        $token = $this->LT($i);
199
200 7
        return $token === null ? Token::INVALID_TYPE : $token->getType();
201
    }
202
203
    protected function LB(int $k) : ?Token
204
    {
205
        if ($this->index - $k < 0) {
206
            return null;
207
        }
208
209
        return $this->tokens[$this->index - $k];
210
    }
211
212
    public function LT(int $k) : ?Token
213
    {
214
        $this->lazyInit();
215
216
        if ($k === 0) {
217
            return null;
218
        }
219
220
        if ($k < 0) {
221
            return $this->LB(-$k);
222
        }
223
224
        $i = $this->index + $k - 1;
225
226
        $this->sync($i);
227
228
        if ($i >= \count($this->tokens)) {
229
            // return EOF token
230
            // EOF must be last token
231
            return $this->tokens[\count($this->tokens) - 1];
232
        }
233
234
        return $this->tokens[$i];
235
    }
236
237
    /**
238
     * Allowed derived classes to modify the behavior of operations which change
239
     * the current stream position by adjusting the target token index of a seek
240
     * operation. The default implementation simply returns `i`. If an
241
     * exception is thrown in this method, the current stream index should not
242
     * be changed.
243
     *
244
     * For example, {@see CommonTokenStream} overrides this method to ensure
245
     * that the seek target is always an on-channel token.
246
     *
247
     * @param int $i The target token index.
248
     *
249
     * @return int The adjusted target token index.
250
     */
251
    public function adjustSeekIndex(int $i) : int
252
    {
253
        return $i;
254
    }
255
256 7
    protected function lazyInit() : void
257
    {
258 7
        if ($this->index === -1) {
259 7
            $this->setup();
260
        }
261 7
    }
262
263 7
    protected function setup() : void
264
    {
265 7
        $this->sync(0);
266
267 7
        $this->index = $this->adjustSeekIndex(0);
268 7
    }
269
270
    /**
271
     * Reset this token stream by setting its token source.
272
     */
273
    public function setTokenSource(TokenSource $tokenSource) : void
274
    {
275
        $this->tokenSource = $tokenSource;
276
        $this->tokens = [];
277
        $this->index = -1;
278
        $this->fetchedEOF = false;
279
    }
280
281
    /**
282
     * @return array<Token>
283
     */
284
    public function getAllTokens() : array
285
    {
286
        return $this->tokens;
287
    }
288
289
    /**
290
     * Get all tokens from start..stop inclusively
291
     *
292
     * @return array<Token>|null
293
     */
294
    public function getTokens(int $start, int $stop, ?Set $types = null) : ?array
295
    {
296
        if ($start < 0 || $stop < 0) {
297
            return null;
298
        }
299
300
        $this->lazyInit();
301
302
        $subset = [];
303
        if ($stop >= \count($this->tokens)) {
304
            $stop = \count($this->tokens) - 1;
305
        }
306
307
        for ($i = $start; $i < $stop; $i++) {
308
            $t = $this->tokens[$i];
309
310
            if ($t->getType() === Token::EOF) {
311
                break;
312
            }
313
314
            if ($types === null || $types->contains($t->getType())) {
315
                $subset[] = $t;
316
            }
317
        }
318
319
        return $subset;
320
    }
321
322
    /**
323
     * Given a starting index, return the index of the next token on channel.
324
     * Return `i` if `tokens[i]` is on channel. Return the index of the EOF
325
     * token if there are no tokens on channel between `i` and EOF.
326
     */
327 7
    protected function nextTokenOnChannel(int $i, int $channel) : int
328
    {
329 7
        $this->sync($i);
330
331 7
        if ($i >= \count($this->tokens)) {
332
            return $this->getLength() - 1;
333
        }
334
335 7
        $token = $this->tokens[$i];
336 7
        while ($token->getChannel() !== $channel) {
337
            if ($token->getType() === Token::EOF) {
338
                return $i;
339
            }
340
341
            $i++;
342
343
            $this->sync($i);
344
345
            $token = $this->tokens[$i];
346
        }
347
348 7
        return $i;
349
    }
350
351
    /**
352
     * Given a starting index, return the index of the previous token on channel.
353
     * Return `i` if `tokens[i]` is on channel. Return -1 if there are no tokens
354
     * on channel between `i` and 0.
355
     *
356
     * If `i` specifies an index at or after the EOF token, the EOF token
357
     * index is returned. This is due to the fact that the EOF token is treated
358
     * as though it were on every channel.
359
     */
360 5
    protected function previousTokenOnChannel(int $i, int $channel) : int
361
    {
362 5
        while ($i >= 0 && $this->tokens[$i]->getChannel() !== $channel) {
363
            $i--;
364
        }
365
366 5
        return $i;
367
    }
368
369
    /**
370
     * Collect all tokens on specified channel to the right of  the current token
371
     * up until we see a token on DEFAULT_TOKEN_CHANNEL or EOF. If channel is -1,
372
     * find any non default channel token.
373
     *
374
     * @return array<Token>
375
     */
376
    public function getHiddenTokensToRight(int $tokenIndex, int $channel) : ?array
377
    {
378
        $this->lazyInit();
379
380
        if ($tokenIndex < 0 || $tokenIndex >= \count($this->tokens)) {
381
            throw new \RuntimeException(\sprintf('%d not in 0..%d', $tokenIndex, \count($this->tokens) - 1));
382
        }
383
384
        $nextOnChannel = $this->nextTokenOnChannel($tokenIndex + 1, Lexer::DEFAULT_TOKEN_CHANNEL);
385
        $from_ = $tokenIndex + 1;
386
        // if none onchannel to right, nextOnChannel=-1 so set to = last token
387
        $to = $nextOnChannel === -1 ? \count($this->tokens) - 1 : $nextOnChannel;
388
389
        return $this->filterForChannel($from_, $to, $channel);
390
    }
391
392
    /**
393
     * Collect all tokens on specified channel to the left of the current token
394
     * up until we see a token on DEFAULT_TOKEN_CHANNEL. If channel is -1, find
395
     * any non default channel token.
396
     *
397
     * @return array<Token>
398
     */
399
    public function getHiddenTokensToLeft(int $tokenIndex, int $channel) : ?array
400
    {
401
        $this->lazyInit();
402
403
        if ($tokenIndex < 0 || $tokenIndex >= \count($this->tokens)) {
404
            throw new \RuntimeException(\sprintf('%d not in 0..%d', $tokenIndex, \count($this->tokens) - 1));
405
        }
406
407
        $prevOnChannel = $this->previousTokenOnChannel($tokenIndex - 1, Lexer::DEFAULT_TOKEN_CHANNEL);
408
409
        if ($prevOnChannel === $tokenIndex - 1) {
410
            return null;
411
        }
412
413
        // if none on channel to left, prevOnChannel=-1 then from=0
414
        $from = $prevOnChannel + 1;
415
        $to = $tokenIndex - 1;
416
417
        return $this->filterForChannel($from, $to, $channel);
418
    }
419
420
    /**
421
     * @return array<Token>|null
422
     */
423
    protected function filterForChannel(int $left, int $right, int $channel) : ?array
424
    {
425
        $hidden = [];
426
        for ($i = $left; $i < $right + 1; $i++) {
427
            $t = $this->tokens[$i];
428
429
            if ($channel === -1) {
430
                if ($t->getChannel() !== Lexer::DEFAULT_TOKEN_CHANNEL) {
431
                    $hidden[] = $t;
432
                }
433
            } elseif ($t->getChannel() === $channel) {
434
                $hidden[] = $t;
435
            }
436
        }
437
438
        if (\count($hidden) === 0) {
439
            return null;
440
        }
441
442
        return $hidden;
443
    }
444
445
    public function getSourceName() : string
446
    {
447
        return $this->tokenSource->getSourceName();
448
    }
449
450
    /**
451
     * Get the text of all tokens in this buffer.
452
     */
453
    public function getTextByInterval(Interval $interval) : string
454
    {
455
        $this->lazyInit();
456
        $this->fill();
457
458
        if ($interval->start < 0 || $interval->stop < 0) {
459
            return '';
460
        }
461
462
        $stop = $interval->stop;
463
464
        if ($stop >= \count($this->tokens)) {
465
            $stop = \count($this->tokens) - 1;
466
        }
467
468
        $s = '';
469
        for ($i = $interval->start; $i <= $stop; $i++) {
470
            $t = $this->tokens[$i];
471
472
            if ($t->getType() === Token::EOF) {
473
                break;
474
            }
475
476
            $s .= $t->getText();
477
        }
478
479
        return $s;
480
    }
481
482
    public function getText() : string
483
    {
484
        return $this->getTextByInterval(new Interval(0, \count($this->tokens) - 1));
485
    }
486
487
    public function getTextByTokens(?Token $start = null, ?Token $stop = null) : string
488
    {
489
        $startIndex = $start === null ? 0 : $start->getTokenIndex();
490
        $stopIndex = $stop === null ? \count($this->tokens) - 1 : $stop->getTokenIndex();
491
492
        return $this->getTextByInterval(new Interval($startIndex, $stopIndex));
493
    }
494
495
    public function getTextByContext(RuleContext $context) : string
496
    {
497
        return $this->getTextByInterval($context->getSourceInterval());
498
    }
499
500
    /**
501
     * Get all tokens from lexer until EOF.
502
     */
503
    public function fill() : void
504
    {
505
        $this->lazyInit();
506
507
        while ($this->fetch(1000) === 1000) {
508
            continue;
509
        }
510
    }
511
}
512