remorhaz /
php-unilex
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | namespace Remorhaz\UniLex\Unicode; |
||
| 6 | |||
| 7 | use Remorhaz\UniLex\Exception; |
||
| 8 | use Remorhaz\UniLex\IO\CharBufferInterface; |
||
| 9 | use Remorhaz\UniLex\IO\TokenExtractInterface; |
||
| 10 | use Remorhaz\UniLex\Lexer\Token; |
||
| 11 | use Remorhaz\UniLex\Lexer\TokenFactoryInterface; |
||
| 12 | use Remorhaz\UniLex\Lexer\TokenMatcherInterface; |
||
| 13 | use Remorhaz\UniLex\Lexer\TokenPosition; |
||
| 14 | use Remorhaz\UniLex\Unicode\Grammar\TokenAttribute; |
||
| 15 | use Remorhaz\UniLex\Unicode\Grammar\TokenFactory; |
||
| 16 | use Remorhaz\UniLex\Unicode\Grammar\TokenType; |
||
| 17 | use Remorhaz\UniLex\Unicode\Grammar\Utf8TokenMatcher; |
||
| 18 | |||
| 19 | class CharBuffer implements CharBufferInterface, TokenExtractInterface |
||
| 20 | { |
||
| 21 | private $source; |
||
| 22 | |||
| 23 | private $matcher; |
||
| 24 | |||
| 25 | private $char; |
||
| 26 | |||
| 27 | private $tokenFactory; |
||
| 28 | |||
| 29 | private $startOffset = 0; |
||
| 30 | |||
| 31 | private $previewOffset = 0; |
||
| 32 | |||
| 33 | private $buffer = []; |
||
| 34 | |||
| 35 | private $sourcePreviewOffset = 0; |
||
| 36 | |||
| 37 | public function __construct(CharBufferInterface $source) |
||
| 38 | { |
||
| 39 | $this->source = $source; |
||
| 40 | } |
||
| 41 | |||
| 42 | public function setMatcher(TokenMatcherInterface $matcher): void |
||
| 43 | { |
||
| 44 | $this->matcher = $matcher; |
||
| 45 | } |
||
| 46 | |||
| 47 | public function setTokenFactory(TokenFactoryInterface $tokenFactory): void |
||
| 48 | { |
||
| 49 | $this->tokenFactory = $tokenFactory; |
||
| 50 | } |
||
| 51 | |||
| 52 | public function isEnd(): bool |
||
| 53 | { |
||
| 54 | return $this->source->isEnd(); |
||
| 55 | } |
||
| 56 | |||
| 57 | /** |
||
| 58 | * @return int |
||
| 59 | * @throws Exception |
||
| 60 | */ |
||
| 61 | public function getSymbol(): int |
||
| 62 | { |
||
| 63 | if (!isset($this->char)) { |
||
| 64 | $this->char = $this->getMatchedChar(); |
||
| 65 | } |
||
| 66 | |||
| 67 | return $this->char; |
||
| 68 | } |
||
| 69 | |||
| 70 | /** |
||
| 71 | * @return int |
||
| 72 | * @throws Exception |
||
| 73 | */ |
||
| 74 | private function getMatchedChar(): int |
||
| 75 | { |
||
| 76 | if ($this->source->isEnd()) { |
||
| 77 | throw new Exception("Unexpected end of source buffer on preview at index {$this->previewOffset}"); |
||
| 78 | } |
||
| 79 | $positionBeforeMatch = $this->source->getTokenPosition(); |
||
| 80 | $result = $this |
||
| 81 | ->getMatcher() |
||
| 82 | ->match($this->source, $this->getTokenFactory()); |
||
| 83 | if (!$result) { |
||
| 84 | throw new Exception("Failed to match Unicode char from source buffer"); |
||
| 85 | } |
||
| 86 | $token = $this |
||
| 87 | ->getMatcher() |
||
| 88 | ->getToken(); |
||
| 89 | if ($token->getType() != TokenType::SYMBOL) { |
||
| 90 | throw new Exception("Invalid Unicode char token"); |
||
| 91 | } |
||
| 92 | $positionAfterMatch = $this->source->getTokenPosition(); |
||
| 93 | $this->sourcePreviewOffset = $positionAfterMatch->getFinishOffset() - $positionBeforeMatch->getFinishOffset(); |
||
| 94 | |||
| 95 | return $token->getAttribute(TokenAttribute::UNICODE_CHAR); |
||
| 96 | } |
||
| 97 | |||
| 98 | /** |
||
| 99 | * @throws Exception |
||
| 100 | */ |
||
| 101 | public function nextSymbol(): void |
||
| 102 | { |
||
| 103 | $this->buffer[] = $this->char ?? $this->getMatchedChar(); |
||
| 104 | $this->sourcePreviewOffset = 0; |
||
| 105 | unset($this->char); |
||
| 106 | $this->previewOffset++; |
||
| 107 | } |
||
| 108 | |||
| 109 | /** |
||
| 110 | * @param int $repeat |
||
| 111 | * @throws Exception |
||
| 112 | */ |
||
| 113 | public function prevSymbol(int $repeat = 1): void |
||
| 114 | { |
||
| 115 | throw new Exception("Unread operation is not supported"); |
||
| 116 | } |
||
| 117 | |||
| 118 | /** |
||
| 119 | * @param Token $token |
||
| 120 | * @throws Exception |
||
| 121 | */ |
||
| 122 | public function finishToken(Token $token): void |
||
| 123 | { |
||
| 124 | $this->cleanupPreview(); |
||
| 125 | $sourcePosition = $this->source->getTokenPosition(); |
||
| 126 | $token->setAttribute(TokenAttribute::UNICODE_BYTE_OFFSET, $sourcePosition->getStartOffset()); |
||
| 127 | $token->setAttribute(TokenAttribute::UNICODE_BYTE_LENGTH, $sourcePosition->getLength()); |
||
| 128 | $this->source->finishToken($token); |
||
| 129 | $charLength = $this->previewOffset - $this->startOffset; |
||
| 130 | $token->setAttribute(TokenAttribute::UNICODE_CHAR_OFFSET, $this->startOffset); |
||
| 131 | $token->setAttribute(TokenAttribute::UNICODE_CHAR_LENGTH, $charLength); |
||
| 132 | $this->startOffset = $this->previewOffset; |
||
| 133 | $this->buffer = []; |
||
| 134 | } |
||
| 135 | |||
| 136 | public function resetToken(): void |
||
| 137 | { |
||
| 138 | $this->previewOffset = $this->startOffset; |
||
| 139 | $this->source->resetToken(); |
||
| 140 | $this->buffer = []; |
||
| 141 | $this->sourcePreviewOffset = 0; |
||
| 142 | unset($this->char); |
||
| 143 | } |
||
| 144 | |||
| 145 | /** |
||
| 146 | * @return TokenPosition |
||
| 147 | * @throws Exception |
||
| 148 | */ |
||
| 149 | public function getTokenPosition(): TokenPosition |
||
| 150 | { |
||
| 151 | return new TokenPosition($this->startOffset, $this->previewOffset); |
||
| 152 | } |
||
| 153 | |||
| 154 | /** |
||
| 155 | * @return string |
||
| 156 | * @throws Exception |
||
| 157 | */ |
||
| 158 | public function getTokenAsString(): string |
||
| 159 | { |
||
| 160 | if ($this->source instanceof TokenExtractInterface) { |
||
| 161 | $this->cleanupPreview(); |
||
| 162 | |||
| 163 | return $this->source->getTokenAsString(); |
||
|
0 ignored issues
–
show
|
|||
| 164 | } |
||
| 165 | throw new Exception("Source buffer doesn't support extracting strings"); |
||
| 166 | } |
||
| 167 | |||
| 168 | private function cleanupPreview(): void |
||
| 169 | { |
||
| 170 | if ($this->sourcePreviewOffset == 0) { |
||
| 171 | return; |
||
| 172 | } |
||
| 173 | $this->source->prevSymbol($this->sourcePreviewOffset); |
||
| 174 | $this->sourcePreviewOffset = 0; |
||
| 175 | unset($this->char); |
||
| 176 | } |
||
| 177 | |||
| 178 | /** |
||
| 179 | * @return array |
||
| 180 | */ |
||
| 181 | public function getTokenAsArray(): array |
||
| 182 | { |
||
| 183 | return $this->buffer; |
||
| 184 | } |
||
| 185 | |||
| 186 | private function getMatcher(): TokenMatcherInterface |
||
| 187 | { |
||
| 188 | if (!isset($this->matcher)) { |
||
| 189 | $this->matcher = new Utf8TokenMatcher(); |
||
| 190 | } |
||
| 191 | |||
| 192 | return $this->matcher; |
||
| 193 | } |
||
| 194 | |||
| 195 | private function getTokenFactory(): TokenFactoryInterface |
||
| 196 | { |
||
| 197 | if (!isset($this->tokenFactory)) { |
||
| 198 | $this->tokenFactory = new TokenFactory(); |
||
| 199 | } |
||
| 200 | |||
| 201 | return $this->tokenFactory; |
||
| 202 | } |
||
| 203 | } |
||
| 204 |
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.
This is most likely a typographical error or the method has been renamed.