1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace Antlr\Antlr4\Runtime\Atn; |
||
6 | |||
7 | use Antlr\Antlr4\Runtime\Atn\Actions\LexerAction; |
||
8 | use Antlr\Antlr4\Runtime\Atn\Actions\LexerIndexedCustomAction; |
||
9 | use Antlr\Antlr4\Runtime\CharStream; |
||
10 | use Antlr\Antlr4\Runtime\Comparison\Equality; |
||
11 | use Antlr\Antlr4\Runtime\Comparison\Equatable; |
||
12 | use Antlr\Antlr4\Runtime\Comparison\Hasher; |
||
13 | use Antlr\Antlr4\Runtime\Lexer; |
||
14 | |||
15 | /** |
||
16 | * Represents an executor for a sequence of lexer actions which traversed during |
||
17 | * the matching operation of a lexer rule (token). |
||
18 | * |
||
19 | * The executor tracks position information for position-dependent lexer actions |
||
20 | * efficiently, ensuring that actions appearing only at the end of the rule do |
||
21 | * not cause bloating of the {@see DFA} created for the lexer. |
||
22 | * |
||
23 | * @author Sam Harwell |
||
24 | */ |
||
25 | final class LexerActionExecutor implements Equatable |
||
26 | { |
||
27 | /** @var array<LexerAction> */ |
||
28 | private $lexerActions; |
||
29 | |||
30 | /** |
||
31 | * Caches the result of {@see LexerActionExecutor::hashCode()} since |
||
32 | * the hash code is an element of the performance-critical |
||
33 | * {@see LexerATNConfig::hashCode()} operation. |
||
34 | * |
||
35 | * @var int|null |
||
36 | */ |
||
37 | private $cachedHashCode; |
||
38 | |||
39 | /** |
||
40 | * @param array<LexerAction> $lexerActions |
||
41 | */ |
||
42 | 2 | public function __construct(array $lexerActions) |
|
43 | { |
||
44 | 2 | $this->lexerActions = $lexerActions; |
|
45 | 2 | } |
|
46 | |||
47 | /** |
||
48 | * Creates a {@see LexerActionExecutor} which executes the actions for |
||
49 | * the input `lexerActionExecutor` followed by a specified `lexerAction`. |
||
50 | * |
||
51 | * @param LexerActionExecutor|null $lexerActionExecutor The executor for actions |
||
52 | * already traversed by |
||
53 | * the lexer while matching |
||
54 | * a token within a particular |
||
55 | * {@see LexerATNConfig}. |
||
56 | * If this is `null`, |
||
57 | * the method behaves as |
||
58 | * though it were an |
||
59 | * empty executor. |
||
60 | * @param LexerAction $lexerAction The lexer action to |
||
61 | * execute after the |
||
62 | * actions specified in |
||
63 | * `lexerActionExecutor`. |
||
64 | * |
||
65 | * @return self A {@see LexerActionExecutor} for executing the combine actions |
||
66 | * of `lexerActionExecutor` and `lexerAction`. |
||
67 | */ |
||
68 | 2 | public static function append( |
|
69 | ?LexerActionExecutor $lexerActionExecutor, |
||
70 | LexerAction $lexerAction |
||
71 | ) : self { |
||
72 | 2 | if ($lexerActionExecutor === null) { |
|
73 | 2 | return new LexerActionExecutor([$lexerAction]); |
|
74 | } |
||
75 | |||
76 | $lexerActions = \array_merge($lexerActionExecutor->lexerActions, [$lexerAction]); |
||
77 | |||
78 | return new LexerActionExecutor($lexerActions); |
||
79 | } |
||
80 | |||
81 | /** |
||
82 | * Creates a {@see LexerActionExecutor} which encodes the current offset |
||
83 | * for position-dependent lexer actions. |
||
84 | * |
||
85 | * Normally, when the executor encounters lexer actions where |
||
86 | * {@see LexerAction::isPositionDependent()} returns `true`, it calls |
||
87 | * {@see IntStream::seek()} on the input {@see CharStream} to set the input |
||
88 | * position to the <em>end</em> of the current token. This behavior provides |
||
89 | * for efficient DFA representation of lexer actions which appear at the end |
||
90 | * of a lexer rule, even when the lexer rule matches a variable number of |
||
91 | * characters. |
||
92 | * |
||
93 | * Prior to traversing a match transition in the ATN, the current offset |
||
94 | * from the token start index is assigned to all position-dependent lexer |
||
95 | * actions which have not already been assigned a fixed offset. By storing |
||
96 | * the offsets relative to the token start index, the DFA representation of |
||
97 | * lexer actions which appear in the middle of tokens remains efficient due |
||
98 | * to sharing among tokens of the same length, regardless of their absolute |
||
99 | * position in the input stream. |
||
100 | * |
||
101 | * If the current executor already has offsets assigned to all |
||
102 | * position-dependent lexer actions, the method returns `this`. |
||
103 | * |
||
104 | * @param int $offset The current offset to assign to all position-dependent |
||
105 | * lexer actions which do not already have offsets assigned. |
||
106 | * |
||
107 | * @return self A {@see LexerActionExecutor} which stores input stream offsets |
||
108 | * for all position-dependent lexer actions. |
||
109 | */ |
||
110 | public function fixOffsetBeforeMatch(int $offset) : self |
||
111 | { |
||
112 | $updatedLexerActions = null; |
||
113 | |||
114 | for ($i = 0, $count = \count($this->lexerActions); $i < $count; $i++) { |
||
115 | if ($this->lexerActions[$i]->isPositionDependent() |
||
116 | && !$this->lexerActions[$i] instanceof LexerIndexedCustomAction) { |
||
117 | if ($updatedLexerActions === null) { |
||
118 | $updatedLexerActions = \array_merge($this->lexerActions, []); |
||
119 | } |
||
120 | |||
121 | $updatedLexerActions[$i] = new LexerIndexedCustomAction($offset, $this->lexerActions[$i]); |
||
122 | } |
||
123 | } |
||
124 | |||
125 | if ($updatedLexerActions === null) { |
||
0 ignored issues
–
show
introduced
by
![]() |
|||
126 | return $this; |
||
127 | } |
||
128 | |||
129 | return new LexerActionExecutor($updatedLexerActions); |
||
130 | } |
||
131 | |||
132 | /** |
||
133 | * Gets the lexer actions to be executed by this executor. |
||
134 | * |
||
135 | * @return array<LexerAction> The lexer actions to be executed by this executor. |
||
136 | */ |
||
137 | public function getLexerActions() : array |
||
138 | { |
||
139 | return $this->lexerActions; |
||
140 | } |
||
141 | |||
142 | /** |
||
143 | * Execute the actions encapsulated by this executor within the context of a |
||
144 | * particular {@see Lexer}. |
||
145 | * |
||
146 | * This method calls {@see IntStream::seek()} to set the position of the |
||
147 | * `input` {@see CharStream} prior to calling {@see LexerAction::execute()} |
||
148 | * on a position-dependent action. Before the method returns, the input |
||
149 | * position will be restored to the same position it was in when the method |
||
150 | * was invoked. |
||
151 | * |
||
152 | * @param Lexer $lexer The lexer instance. |
||
153 | * @param CharStream $input The input stream which is the source for |
||
154 | * the current token. When this method is called, |
||
155 | * the current {@see IntStream::getIndex()} for |
||
156 | * `input` should be the start of the following |
||
157 | * token, i.e. 1 character past the end of the |
||
158 | * current token. |
||
159 | * @param int $startIndex The token start index. This value may be |
||
160 | * passed to {@see IntStream::seek()} to set |
||
161 | * the `input` position to the beginning |
||
162 | * of the token. |
||
163 | */ |
||
164 | 5 | public function execute(Lexer $lexer, CharStream $input, int $startIndex) : void |
|
165 | { |
||
166 | 5 | $requiresSeek = false; |
|
167 | 5 | $stopIndex = $input->getIndex(); |
|
168 | |||
169 | try { |
||
170 | 5 | foreach ($this->lexerActions as $lexerAction) { |
|
171 | 5 | if ($lexerAction instanceof LexerIndexedCustomAction) { |
|
172 | $offset = $lexerAction->getOffset(); |
||
173 | $input->seek($startIndex + $offset); |
||
174 | $lexerAction = $lexerAction->getAction(); |
||
175 | $requiresSeek = $startIndex + $offset !== $stopIndex; |
||
176 | 5 | } elseif ($lexerAction->isPositionDependent()) { |
|
177 | $input->seek($stopIndex); |
||
178 | $requiresSeek = false; |
||
179 | } |
||
180 | |||
181 | 5 | $lexerAction->execute($lexer); |
|
182 | } |
||
183 | 5 | } finally { |
|
184 | 5 | if ($requiresSeek) { |
|
185 | 5 | $input->seek($stopIndex); |
|
186 | } |
||
187 | } |
||
188 | 5 | } |
|
189 | |||
190 | public function hashCode() : int |
||
191 | { |
||
192 | if ($this->cachedHashCode === null) { |
||
193 | $this->cachedHashCode = Hasher::hash($this->lexerActions); |
||
194 | } |
||
195 | |||
196 | return $this->cachedHashCode; |
||
197 | } |
||
198 | |||
199 | public function equals(object $other) : bool |
||
200 | { |
||
201 | if ($this === $other) { |
||
202 | return true; |
||
203 | } |
||
204 | |||
205 | return $other instanceof self |
||
206 | && $this->hashCode() === $other->hashCode() |
||
207 | && Equality::equals($this->lexerActions, $other->lexerActions); |
||
208 | } |
||
209 | |||
210 | public function __toString() : string |
||
211 | { |
||
212 | return \sprintf('LexerActionExecutor[%s]', \implode(', ', $this->lexerActions)); |
||
213 | } |
||
214 | } |
||
215 |