Issues (67)

src/Atn/LexerActionExecutor.php (1 issue)

Severity
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Antlr\Antlr4\Runtime\Atn;
6
7
use Antlr\Antlr4\Runtime\Atn\Actions\LexerAction;
8
use Antlr\Antlr4\Runtime\Atn\Actions\LexerIndexedCustomAction;
9
use Antlr\Antlr4\Runtime\CharStream;
10
use Antlr\Antlr4\Runtime\Comparison\Equality;
11
use Antlr\Antlr4\Runtime\Comparison\Equatable;
12
use Antlr\Antlr4\Runtime\Comparison\Hasher;
13
use Antlr\Antlr4\Runtime\Lexer;
14
15
/**
16
 * Represents an executor for a sequence of lexer actions which traversed during
17
 * the matching operation of a lexer rule (token).
18
 *
19
 * The executor tracks position information for position-dependent lexer actions
20
 * efficiently, ensuring that actions appearing only at the end of the rule do
21
 * not cause bloating of the {@see DFA} created for the lexer.
22
 *
23
 * @author Sam Harwell
24
 */
25
final class LexerActionExecutor implements Equatable
26
{
27
    /** @var array<LexerAction> */
28
    private $lexerActions;
29
30
    /**
31
     * Caches the result of {@see LexerActionExecutor::hashCode()} since
32
     * the hash code is an element of the performance-critical
33
     * {@see LexerATNConfig::hashCode()} operation.
34
     *
35
     * @var int|null
36
     */
37
    private $cachedHashCode;
38
39
    /**
40
     * @param array<LexerAction> $lexerActions
41
     */
42 2
    public function __construct(array $lexerActions)
43
    {
44 2
        $this->lexerActions = $lexerActions;
45 2
    }
46
47
    /**
48
     * Creates a {@see LexerActionExecutor} which executes the actions for
49
     * the input `lexerActionExecutor` followed by a specified `lexerAction`.
50
     *
51
     * @param LexerActionExecutor|null $lexerActionExecutor The executor for actions
52
     *                                                      already traversed by
53
     *                                                      the lexer while matching
54
     *                                                      a token within a particular
55
     *                                                      {@see LexerATNConfig}.
56
     *                                                      If this is `null`,
57
     *                                                      the method behaves as
58
     *                                                      though it were an
59
     *                                                      empty executor.
60
     * @param LexerAction              $lexerAction         The lexer action to
61
     *                                                      execute after the
62
     *                                                      actions specified in
63
     *                                                      `lexerActionExecutor`.
64
     *
65
     * @return self A {@see LexerActionExecutor} for executing the combine actions
66
     *              of `lexerActionExecutor` and `lexerAction`.
67
     */
68 2
    public static function append(
69
        ?LexerActionExecutor $lexerActionExecutor,
70
        LexerAction $lexerAction
71
    ) : self {
72 2
        if ($lexerActionExecutor === null) {
73 2
            return new LexerActionExecutor([$lexerAction]);
74
        }
75
76
        $lexerActions = \array_merge($lexerActionExecutor->lexerActions, [$lexerAction]);
77
78
        return new LexerActionExecutor($lexerActions);
79
    }
80
81
    /**
82
     * Creates a {@see LexerActionExecutor} which encodes the current offset
83
     * for position-dependent lexer actions.
84
     *
85
     * Normally, when the executor encounters lexer actions where
86
     * {@see LexerAction::isPositionDependent()} returns `true`, it calls
87
     * {@see IntStream::seek()} on the input {@see CharStream} to set the input
88
     * position to the <em>end</em> of the current token. This behavior provides
89
     * for efficient DFA representation of lexer actions which appear at the end
90
     * of a lexer rule, even when the lexer rule matches a variable number of
91
     * characters.
92
     *
93
     * Prior to traversing a match transition in the ATN, the current offset
94
     * from the token start index is assigned to all position-dependent lexer
95
     * actions which have not already been assigned a fixed offset. By storing
96
     * the offsets relative to the token start index, the DFA representation of
97
     * lexer actions which appear in the middle of tokens remains efficient due
98
     * to sharing among tokens of the same length, regardless of their absolute
99
     * position in the input stream.
100
     *
101
     * If the current executor already has offsets assigned to all
102
     * position-dependent lexer actions, the method returns `this`.
103
     *
104
     * @param int $offset The current offset to assign to all position-dependent
105
     *                    lexer actions which do not already have offsets assigned.
106
     *
107
     * @return self A {@see LexerActionExecutor} which stores input stream offsets
108
     *              for all position-dependent lexer actions.
109
     */
110
    public function fixOffsetBeforeMatch(int $offset) : self
111
    {
112
        $updatedLexerActions = null;
113
114
        for ($i = 0, $count = \count($this->lexerActions); $i < $count; $i++) {
115
            if ($this->lexerActions[$i]->isPositionDependent()
116
                && !$this->lexerActions[$i] instanceof LexerIndexedCustomAction) {
117
                if ($updatedLexerActions === null) {
118
                    $updatedLexerActions = \array_merge($this->lexerActions, []);
119
                }
120
121
                $updatedLexerActions[$i] = new LexerIndexedCustomAction($offset, $this->lexerActions[$i]);
122
            }
123
        }
124
125
        if ($updatedLexerActions === null) {
0 ignored issues
show
The condition $updatedLexerActions === null is always true.
Loading history...
126
            return $this;
127
        }
128
129
        return new LexerActionExecutor($updatedLexerActions);
130
    }
131
132
    /**
133
     * Gets the lexer actions to be executed by this executor.
134
     *
135
     * @return array<LexerAction> The lexer actions to be executed by this executor.
136
     */
137
    public function getLexerActions() : array
138
    {
139
        return $this->lexerActions;
140
    }
141
142
    /**
143
     * Execute the actions encapsulated by this executor within the context of a
144
     * particular {@see Lexer}.
145
     *
146
     * This method calls {@see IntStream::seek()} to set the position of the
147
     * `input` {@see CharStream} prior to calling {@see LexerAction::execute()}
148
     * on a position-dependent action. Before the method returns, the input
149
     * position will be restored to the same position it was in when the method
150
     * was invoked.
151
     *
152
     * @param Lexer      $lexer      The lexer instance.
153
     * @param CharStream $input      The input stream which is the source for
154
     *                               the current token. When this method is called,
155
     *                               the current {@see IntStream::getIndex()} for
156
     *                               `input` should be the start of the following
157
     *                               token, i.e. 1 character past the end of the
158
     *                               current token.
159
     * @param int        $startIndex The token start index. This value may be
160
     *                               passed to {@see IntStream::seek()} to set
161
     *                               the `input` position to the beginning
162
     *                               of the token.
163
     */
164 5
    public function execute(Lexer $lexer, CharStream $input, int $startIndex) : void
165
    {
166 5
        $requiresSeek = false;
167 5
        $stopIndex = $input->getIndex();
168
169
        try {
170 5
            foreach ($this->lexerActions as $lexerAction) {
171 5
                if ($lexerAction instanceof LexerIndexedCustomAction) {
172
                    $offset = $lexerAction->getOffset();
173
                    $input->seek($startIndex + $offset);
174
                    $lexerAction = $lexerAction->getAction();
175
                    $requiresSeek = $startIndex + $offset !== $stopIndex;
176 5
                } elseif ($lexerAction->isPositionDependent()) {
177
                    $input->seek($stopIndex);
178
                    $requiresSeek = false;
179
                }
180
181 5
                $lexerAction->execute($lexer);
182
            }
183 5
        } finally {
184 5
            if ($requiresSeek) {
185 5
                $input->seek($stopIndex);
186
            }
187
        }
188 5
    }
189
190
    public function hashCode() : int
191
    {
192
        if ($this->cachedHashCode === null) {
193
            $this->cachedHashCode = Hasher::hash($this->lexerActions);
194
        }
195
196
        return $this->cachedHashCode;
197
    }
198
199
    public function equals(object $other) : bool
200
    {
201
        if ($this === $other) {
202
            return true;
203
        }
204
205
        return $other instanceof self
206
            && $this->hashCode() === $other->hashCode()
207
            && Equality::equals($this->lexerActions, $other->lexerActions);
208
    }
209
210
    public function __toString() : string
211
    {
212
        return \sprintf('LexerActionExecutor[%s]', \implode(', ', $this->lexerActions));
213
    }
214
}
215