SetBased /
antlr-php-runtime
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | namespace Antlr\Antlr4\Runtime\Atn; |
||
| 6 | |||
| 7 | use Antlr\Antlr4\Runtime\Atn\Actions\LexerAction; |
||
| 8 | use Antlr\Antlr4\Runtime\Atn\Actions\LexerIndexedCustomAction; |
||
| 9 | use Antlr\Antlr4\Runtime\CharStream; |
||
| 10 | use Antlr\Antlr4\Runtime\Comparison\Equality; |
||
| 11 | use Antlr\Antlr4\Runtime\Comparison\Equatable; |
||
| 12 | use Antlr\Antlr4\Runtime\Comparison\Hasher; |
||
| 13 | use Antlr\Antlr4\Runtime\Lexer; |
||
| 14 | |||
| 15 | /** |
||
| 16 | * Represents an executor for a sequence of lexer actions which traversed during |
||
| 17 | * the matching operation of a lexer rule (token). |
||
| 18 | * |
||
| 19 | * The executor tracks position information for position-dependent lexer actions |
||
| 20 | * efficiently, ensuring that actions appearing only at the end of the rule do |
||
| 21 | * not cause bloating of the {@see DFA} created for the lexer. |
||
| 22 | * |
||
| 23 | * @author Sam Harwell |
||
| 24 | */ |
||
| 25 | final class LexerActionExecutor implements Equatable |
||
| 26 | { |
||
| 27 | /** @var array<LexerAction> */ |
||
| 28 | private $lexerActions; |
||
| 29 | |||
| 30 | /** |
||
| 31 | * Caches the result of {@see LexerActionExecutor::hashCode()} since |
||
| 32 | * the hash code is an element of the performance-critical |
||
| 33 | * {@see LexerATNConfig::hashCode()} operation. |
||
| 34 | * |
||
| 35 | * @var int|null |
||
| 36 | */ |
||
| 37 | private $cachedHashCode; |
||
| 38 | |||
| 39 | /** |
||
| 40 | * @param array<LexerAction> $lexerActions |
||
| 41 | */ |
||
| 42 | 2 | public function __construct(array $lexerActions) |
|
| 43 | { |
||
| 44 | 2 | $this->lexerActions = $lexerActions; |
|
| 45 | 2 | } |
|
| 46 | |||
| 47 | /** |
||
| 48 | * Creates a {@see LexerActionExecutor} which executes the actions for |
||
| 49 | * the input `lexerActionExecutor` followed by a specified `lexerAction`. |
||
| 50 | * |
||
| 51 | * @param LexerActionExecutor|null $lexerActionExecutor The executor for actions |
||
| 52 | * already traversed by |
||
| 53 | * the lexer while matching |
||
| 54 | * a token within a particular |
||
| 55 | * {@see LexerATNConfig}. |
||
| 56 | * If this is `null`, |
||
| 57 | * the method behaves as |
||
| 58 | * though it were an |
||
| 59 | * empty executor. |
||
| 60 | * @param LexerAction $lexerAction The lexer action to |
||
| 61 | * execute after the |
||
| 62 | * actions specified in |
||
| 63 | * `lexerActionExecutor`. |
||
| 64 | * |
||
| 65 | * @return self A {@see LexerActionExecutor} for executing the combine actions |
||
| 66 | * of `lexerActionExecutor` and `lexerAction`. |
||
| 67 | */ |
||
| 68 | 2 | public static function append( |
|
| 69 | ?LexerActionExecutor $lexerActionExecutor, |
||
| 70 | LexerAction $lexerAction |
||
| 71 | ) : self { |
||
| 72 | 2 | if ($lexerActionExecutor === null) { |
|
| 73 | 2 | return new LexerActionExecutor([$lexerAction]); |
|
| 74 | } |
||
| 75 | |||
| 76 | $lexerActions = \array_merge($lexerActionExecutor->lexerActions, [$lexerAction]); |
||
| 77 | |||
| 78 | return new LexerActionExecutor($lexerActions); |
||
| 79 | } |
||
| 80 | |||
| 81 | /** |
||
| 82 | * Creates a {@see LexerActionExecutor} which encodes the current offset |
||
| 83 | * for position-dependent lexer actions. |
||
| 84 | * |
||
| 85 | * Normally, when the executor encounters lexer actions where |
||
| 86 | * {@see LexerAction::isPositionDependent()} returns `true`, it calls |
||
| 87 | * {@see IntStream::seek()} on the input {@see CharStream} to set the input |
||
| 88 | * position to the <em>end</em> of the current token. This behavior provides |
||
| 89 | * for efficient DFA representation of lexer actions which appear at the end |
||
| 90 | * of a lexer rule, even when the lexer rule matches a variable number of |
||
| 91 | * characters. |
||
| 92 | * |
||
| 93 | * Prior to traversing a match transition in the ATN, the current offset |
||
| 94 | * from the token start index is assigned to all position-dependent lexer |
||
| 95 | * actions which have not already been assigned a fixed offset. By storing |
||
| 96 | * the offsets relative to the token start index, the DFA representation of |
||
| 97 | * lexer actions which appear in the middle of tokens remains efficient due |
||
| 98 | * to sharing among tokens of the same length, regardless of their absolute |
||
| 99 | * position in the input stream. |
||
| 100 | * |
||
| 101 | * If the current executor already has offsets assigned to all |
||
| 102 | * position-dependent lexer actions, the method returns `this`. |
||
| 103 | * |
||
| 104 | * @param int $offset The current offset to assign to all position-dependent |
||
| 105 | * lexer actions which do not already have offsets assigned. |
||
| 106 | * |
||
| 107 | * @return self A {@see LexerActionExecutor} which stores input stream offsets |
||
| 108 | * for all position-dependent lexer actions. |
||
| 109 | */ |
||
| 110 | public function fixOffsetBeforeMatch(int $offset) : self |
||
| 111 | { |
||
| 112 | $updatedLexerActions = null; |
||
| 113 | |||
| 114 | for ($i = 0, $count = \count($this->lexerActions); $i < $count; $i++) { |
||
| 115 | if ($this->lexerActions[$i]->isPositionDependent() |
||
| 116 | && !$this->lexerActions[$i] instanceof LexerIndexedCustomAction) { |
||
| 117 | if ($updatedLexerActions === null) { |
||
| 118 | $updatedLexerActions = \array_merge($this->lexerActions, []); |
||
| 119 | } |
||
| 120 | |||
| 121 | $updatedLexerActions[$i] = new LexerIndexedCustomAction($offset, $this->lexerActions[$i]); |
||
| 122 | } |
||
| 123 | } |
||
| 124 | |||
| 125 | if ($updatedLexerActions === null) { |
||
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
| 126 | return $this; |
||
| 127 | } |
||
| 128 | |||
| 129 | return new LexerActionExecutor($updatedLexerActions); |
||
| 130 | } |
||
| 131 | |||
| 132 | /** |
||
| 133 | * Gets the lexer actions to be executed by this executor. |
||
| 134 | * |
||
| 135 | * @return array<LexerAction> The lexer actions to be executed by this executor. |
||
| 136 | */ |
||
| 137 | public function getLexerActions() : array |
||
| 138 | { |
||
| 139 | return $this->lexerActions; |
||
| 140 | } |
||
| 141 | |||
| 142 | /** |
||
| 143 | * Execute the actions encapsulated by this executor within the context of a |
||
| 144 | * particular {@see Lexer}. |
||
| 145 | * |
||
| 146 | * This method calls {@see IntStream::seek()} to set the position of the |
||
| 147 | * `input` {@see CharStream} prior to calling {@see LexerAction::execute()} |
||
| 148 | * on a position-dependent action. Before the method returns, the input |
||
| 149 | * position will be restored to the same position it was in when the method |
||
| 150 | * was invoked. |
||
| 151 | * |
||
| 152 | * @param Lexer $lexer The lexer instance. |
||
| 153 | * @param CharStream $input The input stream which is the source for |
||
| 154 | * the current token. When this method is called, |
||
| 155 | * the current {@see IntStream::getIndex()} for |
||
| 156 | * `input` should be the start of the following |
||
| 157 | * token, i.e. 1 character past the end of the |
||
| 158 | * current token. |
||
| 159 | * @param int $startIndex The token start index. This value may be |
||
| 160 | * passed to {@see IntStream::seek()} to set |
||
| 161 | * the `input` position to the beginning |
||
| 162 | * of the token. |
||
| 163 | */ |
||
| 164 | 5 | public function execute(Lexer $lexer, CharStream $input, int $startIndex) : void |
|
| 165 | { |
||
| 166 | 5 | $requiresSeek = false; |
|
| 167 | 5 | $stopIndex = $input->getIndex(); |
|
| 168 | |||
| 169 | try { |
||
| 170 | 5 | foreach ($this->lexerActions as $lexerAction) { |
|
| 171 | 5 | if ($lexerAction instanceof LexerIndexedCustomAction) { |
|
| 172 | $offset = $lexerAction->getOffset(); |
||
| 173 | $input->seek($startIndex + $offset); |
||
| 174 | $lexerAction = $lexerAction->getAction(); |
||
| 175 | $requiresSeek = $startIndex + $offset !== $stopIndex; |
||
| 176 | 5 | } elseif ($lexerAction->isPositionDependent()) { |
|
| 177 | $input->seek($stopIndex); |
||
| 178 | $requiresSeek = false; |
||
| 179 | } |
||
| 180 | |||
| 181 | 5 | $lexerAction->execute($lexer); |
|
| 182 | } |
||
| 183 | 5 | } finally { |
|
| 184 | 5 | if ($requiresSeek) { |
|
| 185 | 5 | $input->seek($stopIndex); |
|
| 186 | } |
||
| 187 | } |
||
| 188 | 5 | } |
|
| 189 | |||
| 190 | public function hashCode() : int |
||
| 191 | { |
||
| 192 | if ($this->cachedHashCode === null) { |
||
| 193 | $this->cachedHashCode = Hasher::hash($this->lexerActions); |
||
| 194 | } |
||
| 195 | |||
| 196 | return $this->cachedHashCode; |
||
| 197 | } |
||
| 198 | |||
| 199 | public function equals(object $other) : bool |
||
| 200 | { |
||
| 201 | if ($this === $other) { |
||
| 202 | return true; |
||
| 203 | } |
||
| 204 | |||
| 205 | return $other instanceof self |
||
| 206 | && $this->hashCode() === $other->hashCode() |
||
| 207 | && Equality::equals($this->lexerActions, $other->lexerActions); |
||
| 208 | } |
||
| 209 | |||
| 210 | public function __toString() : string |
||
| 211 | { |
||
| 212 | return \sprintf('LexerActionExecutor[%s]', \implode(', ', $this->lexerActions)); |
||
| 213 | } |
||
| 214 | } |
||
| 215 |