Passed
Push — master ( b913d7...852e13 )
by Edward
04:18
created

DfaBuilder::run()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 10
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
cc 4
eloc 8
c 1
b 0
f 1
nc 4
nop 0
dl 0
loc 10
rs 10
1
<?php
2
3
namespace Remorhaz\UniLex\RegExp\FSM;
4
5
use Remorhaz\UniLex\AST\Tree;
6
use Remorhaz\UniLex\Exception as UniLexException;
7
use Remorhaz\UniLex\IO\CharBufferInterface;
8
9
use function array_merge;
10
use function array_unique;
11
use function sort;
12
13
class DfaBuilder
14
{
15
16
    private $dfa;
17
18
    private $nfa;
19
20
    private $nfaCalc;
21
22
    private $stateBuffer = [];
23
24
    public function __construct(Dfa $dfa, Nfa $nfa)
25
    {
26
        $this->dfa = $dfa;
27
        $this->nfa = $nfa;
28
    }
29
30
    /**
31
     * @param Nfa $nfa
32
     * @return Dfa
33
     * @throws UniLexException
34
     */
35
    public static function fromNfa(Nfa $nfa): Dfa
36
    {
37
        $dfa = new Dfa();
38
        (new self($dfa, $nfa))->run();
39
40
        $stateQuery = [];
41
        $nonEquivalentStates = [];
42
        $dfaStates = $dfa->getStateMap()->getStateList();
43
        $devilState = max($dfaStates) + 1;
44
        $dfaStates[] = $devilState;
45
46
        $transitionMap = [];
47
48
        foreach ($dfaStates as $stateA) {
49
            foreach ($dfaStates as $stateB) {
50
                $marked = $nonEquivalentStates[$stateA][$stateB] ?? false;
51
                if ($marked) {
52
                    continue;
53
                }
54
                $isFinishStateA = $stateA == $devilState
55
                    ? false
56
                    : $dfa->getStateMap()->isFinishState($stateA);
57
                $isFinishStateB = $stateB == $devilState
58
                    ? false
59
                    : $dfa->getStateMap()->isFinishState($stateB);
60
                if ($isFinishStateA == $isFinishStateB) {
61
                    continue;
62
                }
63
                $nonEquivalentStates[$stateA][$stateB] = true;
64
                $nonEquivalentStates[$stateB][$stateA] = true;
65
                $stateQuery[] = [$stateA, $stateB];
66
            }
67
            $transitionMap[$stateA][$devilState] = $dfa->getSymbolTable()->getSymbolList();
68
        }
69
70
        foreach ($dfa->getTransitionMap()->getTransitionList() as $sourceState => $transitions) {
71
            $devilStateSymbols = $dfa->getSymbolTable()->getSymbolList();
72
            foreach ($transitions as $targetState => $symbols) {
73
                $devilStateSymbols = array_diff($devilStateSymbols, $symbols);
74
                $transitionMap[$sourceState][$targetState] = $symbols;
75
            }
76
            $transitionMap[$sourceState][$devilState] = $devilStateSymbols;
77
        }
78
79
        while (!empty($stateQuery)) {
80
            [$firstTargetState, $secondTargetState] = array_pop($stateQuery);
81
82
            foreach ($dfaStates as $stateA) {
83
                foreach ($dfaStates as $stateB) {
84
                    $isMarked = $nonEquivalentStates[$stateA][$stateB] ?? false;
85
                    if ($isMarked) {
86
                        continue;
87
                    }
88
                    $symbolsA = $transitionMap[$stateA][$firstTargetState] ?? [];
89
                    $symbolsB = $transitionMap[$stateB][$secondTargetState] ?? [];
90
                    $symbols = array_intersect($symbolsA, $symbolsB);
91
                    if (!empty($symbols)) {
92
                        $nonEquivalentStates[$stateA][$stateB] = true;
93
                        $nonEquivalentStates[$stateB][$stateA] = true;
94
                        $stateQuery[] = [$stateA, $stateB];
95
                    }
96
                }
97
            }
98
        }
99
100
        $equivalentStates = [];
101
        $nextClass = 1;
102
        $classes = [];
103
        foreach ($dfaStates as $stateA) {
104
            if (!isset($classes[$stateA])) {
105
                $class = $nextClass++;
106
                foreach ($dfaStates as $stateB) {
107
                    $isMarked = $nonEquivalentStates[$stateA][$stateB] ?? false;
108
                    if (!$isMarked) {
109
                        $classes[$stateB] = $class;
110
                        if ($stateB != $devilState) {
111
                            $equivalentStates[$class][$stateB] = true;
112
                        }
113
                    }
114
                }
115
            }
116
        }
117
118
        $minimizedDfa = new Dfa();
119
        $minimizedDfa->setSymbolTable($dfa->getSymbolTable());
120
        $nfaStartStates = $nfa->getStateMap()->getStartStateList();
121
        $nfaFinishStates = $nfa->getStateMap()->getFinishStateList();
122
        $newDfaStates = [];
123
        foreach ($equivalentStates as $newState => $dfaStates) {
124
            $newValue = [];
125
            foreach (array_keys($dfaStates) as $dfaState) {
126
                $nfaStates = $dfa->getStateMap()->getStateValue($dfaState);
127
                $newValue = array_merge($newValue, $nfaStates);
128
                $newDfaStates[$dfaState] = $newState;
129
            }
130
            $newValue = array_unique($newValue);
131
            sort($newValue);
132
            $minimizedDfa->getStateMap()->createState($newValue);
133
            $isStartState = !empty(array_intersect($nfaStartStates, $newValue));
134
            if ($isStartState) {
135
                $minimizedDfa->getStateMap()->addStartState($newState);
136
            }
137
            $isFinishState = !empty(array_intersect($nfaFinishStates, $newValue));
138
            if ($isFinishState) {
139
                $minimizedDfa->getStateMap()->addFinishState($newState);
140
            }
141
        }
142
        $transitionMap = [];
143
        foreach ($dfa->getTransitionMap()->getTransitionList() as $sourceState => $transitions) {
144
            foreach ($transitions as $targetState => $symbols) {
145
                $newSourceState = $newDfaStates[$sourceState];
146
                $newTargetState = $newDfaStates[$targetState];
147
                $transitionMap[$newSourceState][$newTargetState] = array_unique(
148
                    array_merge(
149
                        $transitionMap[$newSourceState][$newTargetState] ?? [],
150
                        $symbols
151
                    )
152
                );
153
            }
154
        }
155
        foreach ($transitionMap as $sourceState => $transitions) {
156
            foreach ($transitions as $targetState => $symbols) {
157
                sort($symbols);
158
                $minimizedDfa->getTransitionMap()->addTransition($sourceState, $targetState, $symbols);
159
            }
160
        }
161
162
        return $minimizedDfa;
163
    }
164
165
    /**
166
     * @param CharBufferInterface $buffer
167
     * @return Dfa
168
     * @throws UniLexException
169
     */
170
    public static function fromBuffer(CharBufferInterface $buffer): Dfa
171
    {
172
        $nfa = NfaBuilder::fromBuffer($buffer);
173
174
        return self::fromNfa($nfa);
175
    }
176
177
    /**
178
     * @param Tree $tree
179
     * @return Dfa
180
     * @throws UniLexException
181
     */
182
    public static function fromTree(Tree $tree): Dfa
183
    {
184
        $nfa = NfaBuilder::fromTree($tree);
185
186
        return self::fromNfa($nfa);
187
    }
188
189
    /**
190
     * @throws UniLexException
191
     */
192
    public function run(): void
193
    {
194
        $this->initStateBuffer();
195
        while ($state = $this->getNextState()) {
196
            [$dfaStateIn, $nfaStateInList] = $state;
197
            foreach ($this->getMovesBySymbol(...$nfaStateInList) as $symbolId => $nfaStateOutList) {
198
                $dfaStateOut = $this->createStateIfNotExists($isStateProcessed, ...$nfaStateOutList);
199
                $this->mergeTransition($dfaStateIn, $dfaStateOut, $symbolId);
200
                if (!$isStateProcessed) {
201
                    $this->addNextState($dfaStateOut, ...$nfaStateOutList);
202
                }
203
            }
204
        }
205
    }
206
207
    private function getMovesBySymbol(int ...$nfaStateList): array
208
    {
209
        $result = [];
210
        foreach ($this->dfa->getSymbolTable()->getSymbolList() as $symbolId) {
211
            $symbolMoves = $this->getNfaCalc()->getSymbolMoves($symbolId, ...$nfaStateList);
212
            $nextStateList = $this->getNfaCalc()->getEpsilonClosure(...$symbolMoves);
213
            if (!empty($nextStateList)) {
214
                $result[$symbolId] = $nextStateList;
215
            }
216
        }
217
218
        return $result;
219
    }
220
221
    /**
222
     * @param bool $exists
223
     * @param int  ...$nfaStateList
224
     * @return int
225
     * @throws UniLexException
226
     */
227
    private function createStateIfNotExists(&$exists, int ...$nfaStateList): int
228
    {
229
        $dfaStateMap = $this->dfa->getStateMap();
230
        $exists = $dfaStateMap->stateValueExists($nfaStateList);
231
        if ($exists) {
232
            return $dfaStateMap->getValueState($nfaStateList);
0 ignored issues
show
Bug Best Practice introduced by
The expression return $dfaStateMap->getValueState($nfaStateList) could return the type string which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
233
        }
234
        $dfaState = $dfaStateMap->createState($nfaStateList);
0 ignored issues
show
Bug introduced by
$nfaStateList of type array<integer,integer> is incompatible with the type boolean expected by parameter $value of Remorhaz\UniLex\RegExp\FSM\StateMap::createState(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

234
        $dfaState = $dfaStateMap->createState(/** @scrutinizer ignore-type */ $nfaStateList);
Loading history...
235
        foreach ($nfaStateList as $nfaState) {
236
            if ($this->nfa->getStateMap()->isFinishState($nfaState)) {
237
                $dfaStateMap->addFinishState($dfaState);
238
                break;
239
            }
240
        }
241
242
        return $dfaState;
243
    }
244
245
    /**
246
     * @param int $stateIn
247
     * @param int $stateOut
248
     * @param int $symbolId
249
     * @throws UniLexException
250
     */
251
    private function mergeTransition(int $stateIn, int $stateOut, int $symbolId): void
252
    {
253
        $transitionMap = $this
254
            ->dfa
255
            ->getTransitionMap();
256
        $symbolList = $transitionMap->transitionExists($stateIn, $stateOut)
257
            ? $transitionMap->getTransition($stateIn, $stateOut)
258
            : [];
259
        $symbolList[] = $symbolId;
260
        $transitionMap->replaceTransition($stateIn, $stateOut, $symbolList);
261
    }
262
263
    private function getNfaCalc(): NfaCalc
264
    {
265
        if (!isset($this->nfaCalc)) {
266
            $this->nfaCalc = new NfaCalc($this->nfa);
267
        }
268
269
        return $this->nfaCalc;
270
    }
271
272
    /**
273
     * @throws UniLexException
274
     */
275
    private function initStateBuffer(): void
276
    {
277
        $this->dfa->setSymbolTable($this->nfa->getSymbolTable());
278
        $nfaStateList = [];
279
        foreach ($this->nfa->getStateMap()->getStartStateList() as $nfaStartStateId) {
280
            $nfaStateList = array_unique(
281
                array_merge($nfaStateList, $this->getNfaCalc()->getEpsilonClosure($nfaStartStateId))
282
            );
283
        }
284
        $dfaStartStateId = $this->dfa->getStateMap()->createState($nfaStateList);
0 ignored issues
show
Bug introduced by
$nfaStateList of type array is incompatible with the type boolean expected by parameter $value of Remorhaz\UniLex\RegExp\FSM\StateMap::createState(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

284
        $dfaStartStateId = $this->dfa->getStateMap()->createState(/** @scrutinizer ignore-type */ $nfaStateList);
Loading history...
285
        $this->dfa->getStateMap()->addStartState($dfaStartStateId);
286
        $this->stateBuffer = [[$dfaStartStateId, $nfaStateList]];
287
    }
288
289
    private function getNextState(): ?array
290
    {
291
        return array_pop($this->stateBuffer);
292
    }
293
294
    private function addNextState(int $dfaState, int ...$nfaStateList): void
295
    {
296
        $this->stateBuffer[] = [$dfaState, $nfaStateList];
297
    }
298
}
299