Issues (10)

src/Unicode/CharBuffer.php (1 issue)

Labels
Severity
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Remorhaz\UniLex\Unicode;
6
7
use Remorhaz\UniLex\Exception;
8
use Remorhaz\UniLex\IO\CharBufferInterface;
9
use Remorhaz\UniLex\IO\TokenExtractInterface;
10
use Remorhaz\UniLex\Lexer\Token;
11
use Remorhaz\UniLex\Lexer\TokenFactoryInterface;
12
use Remorhaz\UniLex\Lexer\TokenMatcherInterface;
13
use Remorhaz\UniLex\Lexer\TokenPosition;
14
use Remorhaz\UniLex\Unicode\Grammar\TokenAttribute;
15
use Remorhaz\UniLex\Unicode\Grammar\TokenFactory;
16
use Remorhaz\UniLex\Unicode\Grammar\TokenType;
17
use Remorhaz\UniLex\Unicode\Grammar\Utf8TokenMatcher;
18
19
class CharBuffer implements CharBufferInterface, TokenExtractInterface
20
{
21
    private $source;
22
23
    private $matcher;
24
25
    private $char;
26
27
    private $tokenFactory;
28
29
    private $startOffset = 0;
30
31
    private $previewOffset = 0;
32
33
    private $buffer = [];
34
35
    private $sourcePreviewOffset = 0;
36
37
    public function __construct(CharBufferInterface $source)
38
    {
39
        $this->source = $source;
40
    }
41
42
    public function setMatcher(TokenMatcherInterface $matcher): void
43
    {
44
        $this->matcher = $matcher;
45
    }
46
47
    public function setTokenFactory(TokenFactoryInterface $tokenFactory): void
48
    {
49
        $this->tokenFactory = $tokenFactory;
50
    }
51
52
    public function isEnd(): bool
53
    {
54
        return $this->source->isEnd();
55
    }
56
57
    /**
58
     * @return int
59
     * @throws Exception
60
     */
61
    public function getSymbol(): int
62
    {
63
        if (!isset($this->char)) {
64
            $this->char = $this->getMatchedChar();
65
        }
66
67
        return $this->char;
68
    }
69
70
    /**
71
     * @return int
72
     * @throws Exception
73
     */
74
    private function getMatchedChar(): int
75
    {
76
        if ($this->source->isEnd()) {
77
            throw new Exception("Unexpected end of source buffer on preview at index {$this->previewOffset}");
78
        }
79
        $positionBeforeMatch = $this->source->getTokenPosition();
80
        $result = $this
81
            ->getMatcher()
82
            ->match($this->source, $this->getTokenFactory());
83
        if (!$result) {
84
            throw new Exception("Failed to match Unicode char from source buffer");
85
        }
86
        $token = $this
87
            ->getMatcher()
88
            ->getToken();
89
        if ($token->getType() != TokenType::SYMBOL) {
90
            throw new Exception("Invalid Unicode char token");
91
        }
92
        $positionAfterMatch = $this->source->getTokenPosition();
93
        $this->sourcePreviewOffset = $positionAfterMatch->getFinishOffset() - $positionBeforeMatch->getFinishOffset();
94
95
        return $token->getAttribute(TokenAttribute::UNICODE_CHAR);
96
    }
97
98
    /**
99
     * @throws Exception
100
     */
101
    public function nextSymbol(): void
102
    {
103
        $this->buffer[] = $this->char ?? $this->getMatchedChar();
104
        $this->sourcePreviewOffset = 0;
105
        unset($this->char);
106
        $this->previewOffset++;
107
    }
108
109
    /**
110
     * @param int $repeat
111
     * @throws Exception
112
     */
113
    public function prevSymbol(int $repeat = 1): void
114
    {
115
        throw new Exception("Unread operation is not supported");
116
    }
117
118
    /**
119
     * @param Token $token
120
     * @throws Exception
121
     */
122
    public function finishToken(Token $token): void
123
    {
124
        $this->cleanupPreview();
125
        $sourcePosition = $this->source->getTokenPosition();
126
        $token->setAttribute(TokenAttribute::UNICODE_BYTE_OFFSET, $sourcePosition->getStartOffset());
127
        $token->setAttribute(TokenAttribute::UNICODE_BYTE_LENGTH, $sourcePosition->getLength());
128
        $this->source->finishToken($token);
129
        $charLength = $this->previewOffset - $this->startOffset;
130
        $token->setAttribute(TokenAttribute::UNICODE_CHAR_OFFSET, $this->startOffset);
131
        $token->setAttribute(TokenAttribute::UNICODE_CHAR_LENGTH, $charLength);
132
        $this->startOffset = $this->previewOffset;
133
        $this->buffer = [];
134
    }
135
136
    public function resetToken(): void
137
    {
138
        $this->previewOffset = $this->startOffset;
139
        $this->source->resetToken();
140
        $this->buffer = [];
141
        $this->sourcePreviewOffset = 0;
142
        unset($this->char);
143
    }
144
145
    /**
146
     * @return TokenPosition
147
     * @throws Exception
148
     */
149
    public function getTokenPosition(): TokenPosition
150
    {
151
        return new TokenPosition($this->startOffset, $this->previewOffset);
152
    }
153
154
    /**
155
     * @return string
156
     * @throws Exception
157
     */
158
    public function getTokenAsString(): string
159
    {
160
        if ($this->source instanceof TokenExtractInterface) {
161
            $this->cleanupPreview();
162
163
            return $this->source->getTokenAsString();
0 ignored issues
show
The method getTokenAsString() does not exist on Remorhaz\UniLex\IO\CharBufferInterface. Did you maybe mean getTokenPosition()? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

163
            return $this->source->/** @scrutinizer ignore-call */ getTokenAsString();

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
164
        }
165
        throw new Exception("Source buffer doesn't support extracting strings");
166
    }
167
168
    private function cleanupPreview(): void
169
    {
170
        if ($this->sourcePreviewOffset == 0) {
171
            return;
172
        }
173
        $this->source->prevSymbol($this->sourcePreviewOffset);
174
        $this->sourcePreviewOffset = 0;
175
        unset($this->char);
176
    }
177
178
    /**
179
     * @return array
180
     */
181
    public function getTokenAsArray(): array
182
    {
183
        return $this->buffer;
184
    }
185
186
    private function getMatcher(): TokenMatcherInterface
187
    {
188
        if (!isset($this->matcher)) {
189
            $this->matcher = new Utf8TokenMatcher();
190
        }
191
192
        return $this->matcher;
193
    }
194
195
    private function getTokenFactory(): TokenFactoryInterface
196
    {
197
        if (!isset($this->tokenFactory)) {
198
            $this->tokenFactory = new TokenFactory();
199
        }
200
201
        return $this->tokenFactory;
202
    }
203
}
204