DfaBuilder::fromNfa()   F
last analyzed

Complexity

Conditions 27
Paths > 20000

Size

Total Lines 141
Code Lines 91

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 27
eloc 91
nc 96228
nop 1
dl 0
loc 141
rs 0
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Remorhaz\UniLex\RegExp\FSM;
6
7
use Remorhaz\UniLex\AST\Tree;
8
use Remorhaz\UniLex\Exception as UniLexException;
9
use Remorhaz\UniLex\IO\CharBufferInterface;
10
11
use function array_merge;
12
use function array_unique;
13
use function sort;
14
15
class DfaBuilder
16
{
17
    private $dfa;
18
19
    private $nfa;
20
21
    private $nfaCalc;
22
23
    private $stateBuffer = [];
24
25
    public function __construct(Dfa $dfa, Nfa $nfa)
26
    {
27
        $this->dfa = $dfa;
28
        $this->nfa = $nfa;
29
    }
30
31
    /**
32
     * @param Nfa $nfa
33
     * @return Dfa
34
     * @throws UniLexException
35
     */
36
    public static function fromNfa(Nfa $nfa): Dfa
37
    {
38
        $dfa = new Dfa();
39
        (new self($dfa, $nfa))->run();
40
41
        $stateQuery = [];
42
        $nonEquivalentStates = [];
43
        $dfaStates = $dfa->getStateMap()->getStateList();
44
        $devilState = max($dfaStates) + 1;
45
        $dfaStates[] = $devilState;
46
47
        $transitionMap = [];
48
49
        foreach ($dfaStates as $stateA) {
50
            foreach ($dfaStates as $stateB) {
51
                $marked = $nonEquivalentStates[$stateA][$stateB] ?? false;
52
                if ($marked) {
53
                    continue;
54
                }
55
                $isFinishStateA = $stateA == $devilState
56
                    ? false
57
                    : $dfa->getStateMap()->isFinishState($stateA);
58
                $isFinishStateB = $stateB == $devilState
59
                    ? false
60
                    : $dfa->getStateMap()->isFinishState($stateB);
61
                if ($isFinishStateA == $isFinishStateB) {
62
                    continue;
63
                }
64
                $nonEquivalentStates[$stateA][$stateB] = true;
65
                $nonEquivalentStates[$stateB][$stateA] = true;
66
                $stateQuery[] = [$stateA, $stateB];
67
            }
68
            $transitionMap[$stateA][$devilState] = $dfa->getSymbolTable()->getSymbolList();
69
        }
70
71
        // Making all finish states non-equivalent to distinguish regular expressions
72
        /*foreach ($dfa->getStateMap()->getFinishStateList() as $stateA) {
73
            foreach ($dfa->getStateMap()->getFinishStateList() as $stateB) {
74
                $marked = $nonEquivalentStates[$stateA][$stateB] ?? false;
75
                if ($marked) {
76
                    continue;
77
                }
78
                $nonEquivalentStates[$stateA][$stateB] = true;
79
                $nonEquivalentStates[$stateB][$stateA] = true;
80
                $stateQuery[] = [$stateA, $stateB];
81
            }
82
        }*/
83
84
        foreach ($dfa->getTransitionMap()->getTransitionList() as $sourceState => $transitions) {
85
            $devilStateSymbols = $dfa->getSymbolTable()->getSymbolList();
86
            foreach ($transitions as $targetState => $symbols) {
87
                $devilStateSymbols = array_diff($devilStateSymbols, $symbols);
88
                $transitionMap[$sourceState][$targetState] = $symbols;
89
            }
90
            $transitionMap[$sourceState][$devilState] = $devilStateSymbols;
91
        }
92
93
        while (!empty($stateQuery)) {
94
            [$firstTargetState, $secondTargetState] = array_pop($stateQuery);
95
96
            foreach ($dfaStates as $stateA) {
97
                foreach ($dfaStates as $stateB) {
98
                    $isMarked = $nonEquivalentStates[$stateA][$stateB] ?? false;
99
                    if ($isMarked) {
100
                        continue;
101
                    }
102
                    $symbolsA = $transitionMap[$stateA][$firstTargetState] ?? [];
103
                    $symbolsB = $transitionMap[$stateB][$secondTargetState] ?? [];
104
                    $symbols = array_intersect($symbolsA, $symbolsB);
105
                    if (!empty($symbols)) {
106
                        $nonEquivalentStates[$stateA][$stateB] = true;
107
                        $nonEquivalentStates[$stateB][$stateA] = true;
108
                        $stateQuery[] = [$stateA, $stateB];
109
                    }
110
                }
111
            }
112
        }
113
114
        $equivalentStates = [];
115
        $nextClass = 1;
116
        $classes = [];
117
        foreach ($dfaStates as $stateA) {
118
            if (!isset($classes[$stateA])) {
119
                $class = $nextClass++;
120
                foreach ($dfaStates as $stateB) {
121
                    $isMarked = $nonEquivalentStates[$stateA][$stateB] ?? false;
122
                    if (!$isMarked) {
123
                        $classes[$stateB] = $class;
124
                        if ($stateB != $devilState) {
125
                            $equivalentStates[$class][$stateB] = true;
126
                        }
127
                    }
128
                }
129
            }
130
        }
131
132
        $minimizedDfa = new Dfa();
133
        $minimizedDfa->setSymbolTable($dfa->getSymbolTable());
134
        $nfaStartStates = $nfa->getStateMap()->getStartStateList();
135
        $nfaFinishStates = $nfa->getStateMap()->getFinishStateList();
136
        $newDfaStates = [];
137
        foreach ($equivalentStates as $newState => $dfaStates) {
138
            $newValue = [];
139
            foreach (array_keys($dfaStates) as $dfaState) {
140
                $nfaStates = $dfa->getStateMap()->getStateValue($dfaState);
141
                $newValue = array_merge($newValue, $nfaStates);
142
                $newDfaStates[$dfaState] = $newState;
143
            }
144
            $newValue = array_unique($newValue);
145
            sort($newValue);
146
            $minimizedDfa->getStateMap()->createState($newValue);
147
            $isStartState = !empty(array_intersect($nfaStartStates, $newValue));
148
            if ($isStartState) {
149
                $minimizedDfa->getStateMap()->addStartState($newState);
150
            }
151
            $isFinishState = !empty(array_intersect($nfaFinishStates, $newValue));
152
            if ($isFinishState) {
153
                $minimizedDfa->getStateMap()->addFinishState($newState);
154
            }
155
        }
156
        $transitionMap = [];
157
        foreach ($dfa->getTransitionMap()->getTransitionList() as $sourceState => $transitions) {
158
            foreach ($transitions as $targetState => $symbols) {
159
                $newSourceState = $newDfaStates[$sourceState];
160
                $newTargetState = $newDfaStates[$targetState];
161
                $transitionMap[$newSourceState][$newTargetState] = array_unique(
162
                    array_merge(
163
                        $transitionMap[$newSourceState][$newTargetState] ?? [],
164
                        $symbols
165
                    )
166
                );
167
            }
168
        }
169
        foreach ($transitionMap as $sourceState => $transitions) {
170
            foreach ($transitions as $targetState => $symbols) {
171
                sort($symbols);
172
                $minimizedDfa->getTransitionMap()->addTransition($sourceState, $targetState, $symbols);
173
            }
174
        }
175
176
        return $minimizedDfa;
177
    }
178
179
    /**
180
     * @param CharBufferInterface $buffer
181
     * @return Dfa
182
     * @throws UniLexException
183
     */
184
    public static function fromBuffer(CharBufferInterface $buffer): Dfa
185
    {
186
        $nfa = NfaBuilder::fromBuffer($buffer);
187
188
        return self::fromNfa($nfa);
189
    }
190
191
    /**
192
     * @param Tree $tree
193
     * @return Dfa
194
     * @throws UniLexException
195
     */
196
    public static function fromTree(Tree $tree): Dfa
197
    {
198
        $nfa = NfaBuilder::fromTree($tree);
199
200
        return self::fromNfa($nfa);
201
    }
202
203
    /**
204
     * @throws UniLexException
205
     */
206
    public function run(): void
207
    {
208
        $this->initStateBuffer();
209
        while ($state = $this->getNextState()) {
210
            [$dfaStateIn, $nfaStateInList] = $state;
211
            foreach ($this->getMovesBySymbol(...$nfaStateInList) as $symbolId => $nfaStateOutList) {
212
                $dfaStateOut = $this->createStateIfNotExists($isStateProcessed, ...$nfaStateOutList);
213
                $this->mergeTransition($dfaStateIn, $dfaStateOut, $symbolId);
214
                if (!$isStateProcessed) {
215
                    $this->addNextState($dfaStateOut, ...$nfaStateOutList);
216
                }
217
            }
218
        }
219
    }
220
221
    private function getMovesBySymbol(int ...$nfaStateList): array
222
    {
223
        $result = [];
224
        foreach ($this->dfa->getSymbolTable()->getSymbolList() as $symbolId) {
225
            $symbolMoves = $this->getNfaCalc()->getSymbolMoves($symbolId, ...$nfaStateList);
226
            $nextStateList = $this->getNfaCalc()->getEpsilonClosure(...$symbolMoves);
227
            if (!empty($nextStateList)) {
228
                $result[$symbolId] = $nextStateList;
229
            }
230
        }
231
232
        return $result;
233
    }
234
235
    /**
236
     * @param bool $exists
237
     * @param int  ...$nfaStateList
238
     * @return int
239
     * @throws UniLexException
240
     */
241
    private function createStateIfNotExists(&$exists, int ...$nfaStateList): int
242
    {
243
        $dfaStateMap = $this->dfa->getStateMap();
244
        $exists = $dfaStateMap->stateValueExists($nfaStateList);
245
        if ($exists) {
246
            return $dfaStateMap->getValueState($nfaStateList);
0 ignored issues
show
Bug Best Practice introduced by
The expression return $dfaStateMap->getValueState($nfaStateList) could return the type string which is incompatible with the type-hinted return integer. Consider adding an additional type-check to rule them out.
Loading history...
247
        }
248
        $dfaState = $dfaStateMap->createState($nfaStateList);
0 ignored issues
show
Bug introduced by
$nfaStateList of type array<integer,integer> is incompatible with the type boolean expected by parameter $value of Remorhaz\UniLex\RegExp\FSM\StateMap::createState(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

248
        $dfaState = $dfaStateMap->createState(/** @scrutinizer ignore-type */ $nfaStateList);
Loading history...
249
        foreach ($nfaStateList as $nfaState) {
250
            if ($this->nfa->getStateMap()->isFinishState($nfaState)) {
251
                $dfaStateMap->addFinishState($dfaState);
252
                break;
253
            }
254
        }
255
256
        return $dfaState;
257
    }
258
259
    /**
260
     * @param int $stateIn
261
     * @param int $stateOut
262
     * @param int $symbolId
263
     * @throws UniLexException
264
     */
265
    private function mergeTransition(int $stateIn, int $stateOut, int $symbolId): void
266
    {
267
        $transitionMap = $this
268
            ->dfa
269
            ->getTransitionMap();
270
        $symbolList = $transitionMap->transitionExists($stateIn, $stateOut)
271
            ? $transitionMap->getTransition($stateIn, $stateOut)
272
            : [];
273
        $symbolList[] = $symbolId;
274
        $transitionMap->replaceTransition($stateIn, $stateOut, $symbolList);
275
    }
276
277
    private function getNfaCalc(): NfaCalc
278
    {
279
        if (!isset($this->nfaCalc)) {
280
            $this->nfaCalc = new NfaCalc($this->nfa);
281
        }
282
283
        return $this->nfaCalc;
284
    }
285
286
    /**
287
     * @throws UniLexException
288
     */
289
    private function initStateBuffer(): void
290
    {
291
        $this->dfa->setSymbolTable($this->nfa->getSymbolTable());
292
        $nfaStateList = [];
293
        foreach ($this->nfa->getStateMap()->getStartStateList() as $nfaStartStateId) {
294
            $nfaStateList = array_unique(
295
                array_merge($nfaStateList, $this->getNfaCalc()->getEpsilonClosure($nfaStartStateId))
296
            );
297
        }
298
        $dfaStartStateId = $this->dfa->getStateMap()->createState($nfaStateList);
0 ignored issues
show
Bug introduced by
$nfaStateList of type array is incompatible with the type boolean expected by parameter $value of Remorhaz\UniLex\RegExp\FSM\StateMap::createState(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

298
        $dfaStartStateId = $this->dfa->getStateMap()->createState(/** @scrutinizer ignore-type */ $nfaStateList);
Loading history...
299
        $this->dfa->getStateMap()->addStartState($dfaStartStateId);
300
        $this->stateBuffer = [[$dfaStartStateId, $nfaStateList]];
301
    }
302
303
    private function getNextState(): ?array
304
    {
305
        return array_pop($this->stateBuffer);
306
    }
307
308
    private function addNextState(int $dfaState, int ...$nfaStateList): void
309
    {
310
        $this->stateBuffer[] = [$dfaState, $nfaStateList];
311
    }
312
}
313