Passed
Push — master ( 852e13...6d4e0e )
by Edward
03:37
created

TokenMatcherGenerator::buildStateTransition()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 3
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Remorhaz\UniLex\Lexer;
6
7
use ReflectionException;
8
use Remorhaz\UniLex\AST\Translator;
9
use Remorhaz\UniLex\AST\Tree;
10
use Remorhaz\UniLex\Exception;
11
use Remorhaz\UniLex\RegExp\FSM\Dfa;
12
use Remorhaz\UniLex\RegExp\FSM\DfaBuilder;
13
use Remorhaz\UniLex\RegExp\FSM\LanguageBuilder;
14
use Remorhaz\UniLex\RegExp\FSM\Nfa;
15
use Remorhaz\UniLex\RegExp\FSM\NfaBuilder;
16
use Remorhaz\UniLex\RegExp\FSM\Range;
17
use Remorhaz\UniLex\RegExp\FSM\RangeSet;
18
use Remorhaz\UniLex\RegExp\ParserFactory;
19
use Remorhaz\UniLex\RegExp\PropertyLoader;
20
use Remorhaz\UniLex\Unicode\CharBufferFactory;
21
use Throwable;
22
23
use function array_diff;
24
use function array_intersect;
25
use function array_merge;
26
use function array_pop;
27
use function array_unique;
28
use function count;
29
use function implode;
30
use function in_array;
31
32
class TokenMatcherGenerator
33
{
34
35
    private $spec;
36
37
    private $output;
38
39
    private $dfa;
40
41
    private $regExpFinishMap = [];
42
43
    public function __construct(TokenMatcherSpec $spec)
44
    {
45
        $this->spec = $spec;
46
    }
47
48
    /**
49
     * @return string
50
     * @throws Exception
51
     * @throws ReflectionException
52
     */
53
    private function buildOutput(): string
54
    {
55
        return
56
            "{$this->buildFileComment()}\ndeclare(strict_types=1);\n\n" .
57
            "{$this->buildHeader()}\n" .
58
            "class {$this->spec->getTargetShortName()} extends {$this->spec->getTemplateClass()->getShortName()}\n" .
59
            "{\n" .
60
            "\n" .
61
            "    public function match({$this->buildMatchParameters()}): bool\n" .
62
            "    {\n{$this->buildMatchBody()}" .
63
            "    }\n" .
64
            "}\n";
65
    }
66
67
    /**
68
     * @return TokenMatcherInterface
69
     * @throws Exception
70
     */
71
    public function load(): TokenMatcherInterface
72
    {
73
        $targetClass = $this->spec->getTargetClassName();
74
        if (!class_exists($targetClass)) {
75
            try {
76
                $source = $this->getOutput(false);
77
                eval($source);
0 ignored issues
show
introduced by
The use of eval() is discouraged.
Loading history...
78
            } catch (Throwable $e) {
79
                throw new Exception("Invalid PHP code generated", 0, $e);
80
            }
81
            if (!class_exists($targetClass)) {
82
                throw new Exception("Failed to generate target class");
83
            }
84
        }
85
86
        return new $targetClass();
87
    }
88
89
    /**
90
     * @param bool $asFile
91
     * @return string
92
     * @throws Exception
93
     * @throws ReflectionException
94
     */
95
    public function getOutput(bool $asFile = true): string
96
    {
97
        if (!isset($this->output)) {
98
            $this->output = $this->buildOutput();
99
        }
100
101
        return $asFile ? "<?php\n\n{$this->output}" : $this->output;
102
    }
103
104
    private function buildFileComment(): string
105
    {
106
        $content = $this->spec->getFileComment();
107
        if ('' == $content) {
108
            return '';
109
        }
110
        $comment = "/**\n";
111
        $commentLineList = explode("\n", $content);
112
        foreach ($commentLineList as $commentLine) {
113
            $comment .= rtrim(" * {$commentLine}") . "\n";
114
        }
115
        $comment .= " */\n";
116
117
        return $comment;
118
    }
119
120
    /**
121
     * @return string
122
     * @throws ReflectionException
123
     */
124
    public function buildHeader(): string
125
    {
126
        $headerParts = [];
127
        $namespace = $this->spec->getTargetNamespaceName();
128
        if ($namespace != '') {
129
            $headerParts[] = $this->buildMethodPart("namespace {$namespace};", 0);
130
        }
131
        $useList = $this->buildUseList();
132
        if ('' != $useList) {
133
            $headerParts[] = $useList;
134
        }
135
        $header = $this->buildMethodPart($this->spec->getHeader(), 0);
136
        if ('' != $header) {
137
            $headerParts[] = $header;
138
        }
139
140
        return implode("\n", $headerParts);
141
    }
142
143
    /**
144
     * @return string
145
     * @throws ReflectionException
146
     */
147
    private function buildUseList(): string
148
    {
149
        $result = '';
150
        foreach ($this->spec->getUsedClassList() as $alias => $className) {
151
            $classWithAlias = is_string($alias) ? "{$className} {$alias}" : $className;
152
            $result .= $this->buildMethodPart("use {$classWithAlias};", 0);
153
        }
154
155
        return $result;
156
    }
157
158
    /**
159
     * @return string
160
     * @throws ReflectionException
161
     */
162
    private function buildMatchParameters(): string
163
    {
164
        $paramList = [];
165
        foreach ($this->spec->getMatchMethod()->getParameters() as $matchParameter) {
166
            if ($matchParameter->hasType()) {
167
                $param = $matchParameter->getType()->isBuiltin()
168
                    ? $matchParameter->getType()->getName()
169
                    : $matchParameter->getClass()->getShortName();
170
                $param .= " ";
171
            } else {
172
                $param = "";
173
            }
174
            $param .= "\${$matchParameter->getName()}";
175
            $paramList[] = $param;
176
        }
177
178
        return implode(", ", $paramList);
179
    }
180
181
    /**
182
     * @return string
183
     * @throws Exception
184
     */
185
    private function buildMatchBody(): string
186
    {
187
        $result = $this->buildBeforeMatch();
188
189
        foreach ($this->spec->getModeList() as $mode) {
190
            if (TokenMatcherInterface::DEFAULT_MODE == $mode) {
191
                continue;
192
            }
193
            $result .=
194
                $this->buildMethodPart("if (\$context->getMode() == '{$mode}') {") .
195
                $this->buildFsmEntry($mode, 3) .
196
                $this->buildMethodPart("}");
197
        }
198
        foreach ($this->spec->getModeList() as $mode) {
199
            if (TokenMatcherInterface::DEFAULT_MODE == $mode) {
200
                $result .= $this->buildFsmEntry(TokenMatcherInterface::DEFAULT_MODE) . "\n";
201
            }
202
            $result .= $this->buildFsmMoves($mode);
203
        }
204
205
        $result .= $this->buildErrorState();
206
207
        return $result;
208
    }
209
210
    private function buildBeforeMatch(): string
211
    {
212
        return
213
            $this->buildMethodPart("\$context = \$this->createContext(\$buffer, \$tokenFactory);") .
214
            $this->buildMethodPart($this->spec->getBeforeMatch());
215
    }
216
217
    /**
218
     * @param string $mode
219
     * @param int    $indent
220
     * @return string
221
     * @throws Exception
222
     */
223
    private function buildFsmEntry(string $mode, int $indent = 2): string
224
    {
225
        $state = $this->getDfa($mode)->getStateMap()->getStartState();
226
227
        return $this->buildMethodPart("goto {$this->buildStateLabel('state', $mode, $state)};", $indent);
228
    }
229
230
    private function buildStateLabel(string $prefix, string $mode, int $state): string
231
    {
232
        $contextSuffix = TokenMatcherInterface::DEFAULT_MODE == $mode
233
            ? ''
234
            : ucfirst($mode);
235
236
        return "{$prefix}{$contextSuffix}{$state}";
237
    }
238
239
    /**
240
     * @param string $mode
241
     * @return string
242
     * @throws Exception
243
     */
244
    private function buildFsmMoves(string $mode): string
245
    {
246
        $result = '';
247
        foreach ($this->getDfa($mode)->getStateMap()->getStateList() as $stateIn) {
248
            if ($this->isFinishStateWithSingleEnteringTransition($mode, $stateIn)) {
249
                continue;
250
            }
251
            $result .=
252
                $this->buildStateEntry($mode, $stateIn) .
253
                $this->buildStateTransitionList($mode, $stateIn) .
254
                $this->buildStateFinish($mode, $stateIn);
255
        }
256
257
        return $result;
258
    }
259
260
    /**
261
     * @param string $mode
262
     * @param int    $stateIn
263
     * @return string
264
     * @throws Exception
265
     */
266
    private function buildStateEntry(string $mode, int $stateIn): string
267
    {
268
        $result = '';
269
        $result .= $this->buildMethodPart("{$this->buildStateLabel('state', $mode, $stateIn)}:");
270
        $moves = $this->getDfa($mode)->getTransitionMap()->getExitList($stateIn);
271
        if (empty($moves)) {
272
            return $result;
273
        }
274
        $result .= $this->buildMethodPart("if (\$context->getBuffer()->isEnd()) {");
275
        $result .= $this->getDfa($mode)->getStateMap()->isFinishState($stateIn)
276
            ? $this->buildMethodPart("goto {$this->buildStateLabel('finish', $mode, $stateIn)};", 3)
277
            : $this->buildMethodPart("goto error;", 3);
278
        $result .=
279
            $this->buildMethodPart("}") .
280
            $this->buildMethodPart("\$char = \$context->getBuffer()->getSymbol();");
281
282
        return $result;
283
    }
284
285
    /**
286
     * @param string $mode
287
     * @param int    $stateIn
288
     * @return string
289
     * @throws Exception
290
     */
291
    private function buildStateTransitionList(string $mode, int $stateIn): string
292
    {
293
        $result = '';
294
        foreach ($this->getDfa($mode)->getTransitionMap()->getExitList($stateIn) as $stateOut => $symbolList) {
295
            foreach ($symbolList as $symbol) {
296
                $rangeSet = $this->getDfa($mode)->getSymbolTable()->getRangeSet($symbol);
297
                $result .=
298
                    $this->buildMethodPart("if ({$this->buildRangeSetCondition($rangeSet)}) {") .
299
                    $this->buildOnTransition() .
300
                    $this->buildMethodPart("\$context->getBuffer()->nextSymbol();", 3);
301
                $result .= $this->isFinishStateWithSingleEnteringTransition($mode, $stateOut)
302
                    ? $this->buildToken($mode, $stateOut, 3)
303
                    : $this->buildStateTransition($mode, $stateOut, 3);
304
                $result .= $this->buildMethodPart("}");
305
            }
306
        }
307
308
        return $result;
309
    }
310
311
    /**
312
     * @param string $mode
313
     * @param int    $stateOut
314
     * @param int    $indent
315
     * @return string
316
     */
317
    private function buildStateTransition(string $mode, int $stateOut, int $indent = 3): string
318
    {
319
        return $this->buildMethodPart("goto {$this->buildStateLabel('state', $mode, $stateOut)};", $indent);
320
    }
321
322
    /**
323
     * @param string $mode
324
     * @param int    $stateOut
325
     * @return bool
326
     * @throws Exception
327
     */
328
    private function isFinishStateWithSingleEnteringTransition(string $mode, int $stateOut): bool
329
    {
330
        if (!$this->getDfa($mode)->getStateMap()->isFinishState($stateOut)) {
331
            return false;
332
        }
333
        $enters = $this->getDfa($mode)->getTransitionMap()->getEnterList($stateOut);
334
        $exits = $this->getDfa($mode)->getTransitionMap()->getExitList($stateOut);
335
        if (!(count($enters) == 1 && count($exits) == 0)) {
336
            return false;
337
        }
338
        $symbolList = array_pop($enters);
339
340
        return count($symbolList) == 1;
341
    }
342
343
    private function buildHex(int $char): string
344
    {
345
        $hexChar = strtoupper(dechex($char));
346
        if (strlen($hexChar) % 2 != 0) {
347
            $hexChar = "0{$hexChar}";
348
        }
349
350
        return "0x{$hexChar}";
351
    }
352
353
    private function buildRangeCondition(Range $range): array
354
    {
355
        $startChar = $this->buildHex($range->getStart());
356
        if ($range->getStart() == $range->getFinish()) {
357
            return ["{$startChar} == \$char"];
358
        }
359
        $finishChar = $this->buildHex($range->getFinish());
360
        if ($range->getStart() + 1 == $range->getFinish()) {
361
            return [
362
                "{$startChar} == \$char",
363
                "{$finishChar} == \$char",
364
            ];
365
        }
366
367
        return ["{$startChar} <= \$char && \$char <= {$finishChar}"];
368
    }
369
370
    private function buildRangeSetCondition(RangeSet $rangeSet): string
371
    {
372
        $conditionList = [];
373
        foreach ($rangeSet->getRanges() as $range) {
374
            $conditionList = array_merge($conditionList, $this->buildRangeCondition($range));
375
        }
376
        $result = implode(" || ", $conditionList);
377
        if (strlen($result) + 15 <= 120 || count($conditionList) == 1) {
378
            return ltrim($result);
379
        }
380
        $result = $this->buildMethodPart(implode(" ||\n", $conditionList), 1);
381
382
        return "\n    " . ltrim($result);
383
    }
384
385
    /**
386
     * @param string $mode
387
     * @param int    $stateIn
388
     * @return string
389
     * @throws Exception
390
     */
391
    private function buildStateFinish(string $mode, int $stateIn): string
392
    {
393
        if (!$this->getDfa($mode)->getStateMap()->isFinishState($stateIn)) {
394
            return $this->buildMethodPart("goto error;\n");
395
        }
396
        $result = '';
397
        if (!empty($this->getDfa($mode)->getTransitionMap()->getExitList($stateIn))) {
398
            $result .= $this->buildMethodPart("{$this->buildStateLabel('finish', $mode, $stateIn)}:");
399
        }
400
        $result .= "{$this->buildToken($mode, $stateIn)}\n";
401
402
        return $result;
403
    }
404
405
    /**
406
     * @param string $mode
407
     * @param int    $stateIn
408
     * @param int    $indent
409
     * @return string
410
     * @throws Exception
411
     */
412
    private function buildToken(string $mode, int $stateIn, int $indent = 2): string
413
    {
414
        if (!isset($this->regExpFinishMap[$stateIn])) {
415
            throw new Exception("No regular expressions found for state {$mode}:{$stateIn}");
416
        }
417
        $tokenSpec = $this->spec->getTokenSpec($mode, $this->regExpFinishMap[$stateIn]);
418
419
        return
420
            $this->buildMethodPart("// {$tokenSpec->getRegExp()}", $indent) .
421
            $this->buildSingleToken($tokenSpec, $indent);
422
    }
423
424
    private function buildSingleToken(TokenSpec $tokenSpec, int $indent): string
425
    {
426
        return
427
            $this->buildMethodPart($tokenSpec->getCode(), $indent) .
428
            $this->buildOnToken($indent) . "\n" .
429
            $this->buildMethodPart("return true;", $indent);
430
    }
431
432
    private function buildErrorState(): string
433
    {
434
        $code = $this->spec->getOnError();
435
436
        return
437
            $this->buildMethodPart("error:") .
438
            $this->buildMethodPart('' == $code ? "return false;" : $code);
439
    }
440
441
    private function buildMethodPart(string $code, int $indent = 2): string
442
    {
443
        if ('' == $code) {
444
            return '';
445
        }
446
        $result = '';
447
        $codeLineList = explode("\n", $code);
448
        foreach ($codeLineList as $codeLine) {
449
            $line = '';
450
            for ($i = 0; $i < $indent; $i++) {
451
                $line .= "    ";
452
            }
453
            $result .= rtrim($line . $codeLine) . "\n";
454
        }
455
456
        return $result;
457
    }
458
459
    private function buildOnTransition(): string
460
    {
461
        return $this->buildMethodPart($this->spec->getOnTransition(), 3);
462
    }
463
464
    private function buildOnToken(int $indent = 2): string
465
    {
466
        return $this->buildMethodPart($this->spec->getOnToken(), $indent);
467
    }
468
469
    /**
470
     * @param string $context
471
     * @return Dfa
472
     * @throws Exception
473
     */
474
    private function getDfa(string $context): Dfa
475
    {
476
        if (!isset($this->dfa[$context])) {
477
            $this->dfa[$context] = $this->buildDfa($context);
478
        }
479
480
        return $this->dfa[$context];
481
    }
482
483
    /**
484
     * @param string $mode
485
     * @return Dfa
486
     * @throws Exception
487
     */
488
    private function buildDfa(string $mode): Dfa
489
    {
490
        $nfa = new Nfa();
491
        $startState = $nfa->getStateMap()->createState();
492
        $nfa->getStateMap()->addStartState($startState);
493
        $nfaRegExpMap = [];
494
        /** @var Dfa[] $dfaList */
495
        $dfaList = [];
496
        foreach ($this->spec->getTokenSpecList($mode) as $tokenSpec) {
497
            $existingStates = $nfa->getStateMap()->getStateList();
498
            $regExpEntryState = $nfa->getStateMap()->createState();
499
            $nfa
500
                ->getEpsilonTransitionMap()
501
                ->addTransition($startState, $regExpEntryState, true);
502
            $this->buildRegExp($nfa, $regExpEntryState, $tokenSpec->getRegExp());
503
            $regExpStates = array_diff($nfa->getStateMap()->getStateList(), $existingStates);
504
            $nfaRegExpMap[$tokenSpec->getRegExp()] = $regExpStates;
505
            $dfaList[$tokenSpec->getRegExp()] = $this->buildIndependentRegExp($tokenSpec->getRegExp());
506
        }
507
508
        $joinedNfa = new Nfa();
509
        $startState = $joinedNfa->getStateMap()->createState();
510
        $joinedNfa->getStateMap()->addStartState($startState);
511
        $languageBuilder = LanguageBuilder::forNfa($joinedNfa);
512
        $joinedNfaStates = [];
513
        $nfaRegExpMap = [];
514
        $regExpFinishMap = [];
515
        foreach ($dfaList as $regExp => $dfa) {
516
            $nfaRegExpMap[$regExp] = [];
517
            foreach ($dfa->getStateMap()->getStateList() as $dfaState) {
518
                $nfaState = $joinedNfa->getStateMap()->createState([$dfaState]);
0 ignored issues
show
Bug introduced by
array($dfaState) of type array is incompatible with the type boolean expected by parameter $value of Remorhaz\UniLex\RegExp\FSM\StateMap::createState(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

518
                $nfaState = $joinedNfa->getStateMap()->createState(/** @scrutinizer ignore-type */ [$dfaState]);
Loading history...
519
                $nfaRegExpMap[$regExp][] = $nfaState;
520
                $joinedNfaStates[$dfaState] = $nfaState;
521
                if ($dfa->getStateMap()->isStartState($dfaState)) {
522
                    $joinedNfa->getEpsilonTransitionMap()->addTransition($startState, $nfaState, true);
523
                }
524
                if ($dfa->getStateMap()->isFinishState($dfaState)) {
525
                    $regExpFinishMap[$regExp] = $nfaState;
526
                    $joinedNfa->getStateMap()->addFinishState($nfaState);
527
                }
528
            }
529
            foreach ($dfa->getTransitionMap()->getTransitionList() as $dfaStateIn => $transitions) {
530
                foreach ($transitions as $dfaStateOut => $symbols) {
531
                    foreach ($symbols as $symbol) {
532
                        $rangeSet = $dfa->getSymbolTable()->getRangeSet($symbol);
533
                        $newSymbols = $languageBuilder->getSymbolList(...$rangeSet->getRanges());
534
                        $oldSymbols = $joinedNfa
535
                            ->getSymbolTransitionMap()
536
                            ->transitionExists($joinedNfaStates[$dfaStateIn], $joinedNfaStates[$dfaStateOut])
537
                            ? $joinedNfa
538
                                ->getSymbolTransitionMap()
539
                                ->getTransition($joinedNfaStates[$dfaStateIn], $joinedNfaStates[$dfaStateOut])
540
                            : [];
541
                        $joinedNfa->getSymbolTransitionMap()->replaceTransition(
542
                            $joinedNfaStates[$dfaStateIn],
543
                            $joinedNfaStates[$dfaStateOut],
544
                            array_unique(array_merge($oldSymbols, $newSymbols))
545
                        );
546
                    }
547
                }
548
            }
549
        }
550
551
        $dfa = new Dfa();
552
        (new DfaBuilder($dfa, $joinedNfa))->run();
553
554
        $dfaRegExpFinishMap = [];
555
        foreach ($dfa->getStateMap()->getFinishStateList() as $dfaFinishState) {
556
            $nfaFinishStates = array_intersect(
557
                $dfa->getStateMap()->getStateValue($dfaFinishState),
558
                $joinedNfa->getStateMap()->getFinishStateList()
559
            );
560
            foreach ($regExpFinishMap as $regExp => $regExpFinishState) {
561
                foreach ($nfaFinishStates as $nfaFinishState) {
562
                    if ($regExpFinishState == $nfaFinishState) {
563
                        $dfaRegExpFinishMap[$dfaFinishState] = (string) $regExp;
564
                        break 2;
565
                    }
566
                }
567
            }
568
        }
569
        foreach ($this->spec->getTokenSpecList($mode) as $tokenSpec) {
570
            if (!in_array($tokenSpec->getRegExp(), $dfaRegExpFinishMap)) {
571
                throw new Exception("Token not reachable for regular expression: {$tokenSpec->getRegExp()} ");
572
            }
573
        }
574
        $this->regExpFinishMap = $dfaRegExpFinishMap;
575
576
        return $dfa;
577
    }
578
579
    /**
580
     * @param Nfa    $nfa
581
     * @param int    $entryState
582
     * @param string $regExp
583
     * @throws Exception
584
     */
585
    private function buildRegExp(Nfa $nfa, int $entryState, string $regExp): void
586
    {
587
        $buffer = CharBufferFactory::createFromString($regExp);
588
        $tree = new Tree();
589
        ParserFactory::createFromBuffer($tree, $buffer)->run();
590
        $nfaBuilder = new NfaBuilder($nfa, PropertyLoader::create());
591
        $nfaBuilder->setStartState($entryState);
592
        (new Translator($tree, $nfaBuilder))->run();
593
    }
594
595
    /**
596
     * @param string $regExp
597
     * @return Dfa
598
     * @throws Exception
599
     */
600
    private function buildIndependentRegExp(string $regExp): Dfa
601
    {
602
        $buffer = CharBufferFactory::createFromString($regExp);
603
        $tree = new Tree();
604
        ParserFactory::createFromBuffer($tree, $buffer)->run();
605
        $nfa = new Nfa();
606
        $nfaBuilder = new NfaBuilder($nfa, PropertyLoader::create());
607
        (new Translator($tree, $nfaBuilder))->run();
608
609
        return DfaBuilder::fromNfa($nfa);
610
    }
611
}
612