Passed
Push — master ( 5848ce...73ad9b )
by Edward
03:25
created

TokenMatcherGenerator::buildConditionFunctions()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 13
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
eloc 8
c 1
b 0
f 0
nc 2
nop 0
dl 0
loc 13
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Remorhaz\UniLex\Lexer;
6
7
use ReflectionException;
8
use Remorhaz\UniLex\AST\Translator;
9
use Remorhaz\UniLex\AST\Tree;
10
use Remorhaz\UniLex\Exception;
11
use Remorhaz\UniLex\RegExp\FSM\Dfa;
12
use Remorhaz\UniLex\RegExp\FSM\DfaBuilder;
13
use Remorhaz\UniLex\RegExp\FSM\LanguageBuilder;
14
use Remorhaz\UniLex\RegExp\FSM\Nfa;
15
use Remorhaz\UniLex\RegExp\FSM\NfaBuilder;
16
use Remorhaz\UniLex\RegExp\FSM\Range;
17
use Remorhaz\UniLex\RegExp\ParserFactory;
18
use Remorhaz\UniLex\RegExp\PropertyLoader;
19
use Remorhaz\UniLex\Unicode\CharBufferFactory;
20
use Throwable;
21
22
use function array_diff;
23
use function array_intersect;
24
use function array_merge;
25
use function array_pop;
26
use function array_unique;
27
use function count;
28
use function implode;
29
use function in_array;
30
31
class TokenMatcherGenerator
32
{
33
34
    private $spec;
35
36
    private $output;
37
38
    private $dfa;
39
40
    private $regExpFinishMap = [];
41
42
    private $conditionFunctions = [];
43
44
    public function __construct(TokenMatcherSpec $spec)
45
    {
46
        $this->spec = $spec;
47
    }
48
49
    /**
50
     * @return string
51
     * @throws Exception
52
     * @throws ReflectionException
53
     */
54
    private function buildOutput(): string
55
    {
56
        $this->conditionFunctions = [];
57
58
        return
59
            "{$this->buildFileComment()}\ndeclare(strict_types=1);\n\n" .
60
            "{$this->buildHeader()}\n" .
61
            "class {$this->spec->getTargetShortName()} extends {$this->spec->getTemplateClass()->getShortName()}\n" .
62
            "{\n" .
63
            "\n" .
64
            "    public function match({$this->buildMatchParameters()}): bool\n" .
65
            "    {\n{$this->buildMatchBody()}" .
66
            "    }\n" .
67
            $this->buildConditionFunctions() .
68
            "}\n";
69
    }
70
71
    /**
72
     * @return TokenMatcherInterface
73
     * @throws Exception
74
     */
75
    public function load(): TokenMatcherInterface
76
    {
77
        $targetClass = $this->spec->getTargetClassName();
78
        if (!class_exists($targetClass)) {
79
            try {
80
                $source = $this->getOutput(false);
81
                eval($source);
0 ignored issues
show
introduced by
The use of eval() is discouraged.
Loading history...
82
            } catch (Throwable $e) {
83
                throw new Exception("Invalid PHP code generated", 0, $e);
84
            }
85
            if (!class_exists($targetClass)) {
86
                throw new Exception("Failed to generate target class");
87
            }
88
        }
89
90
        return new $targetClass();
91
    }
92
93
    /**
94
     * @param bool $asFile
95
     * @return string
96
     * @throws Exception
97
     * @throws ReflectionException
98
     */
99
    public function getOutput(bool $asFile = true): string
100
    {
101
        if (!isset($this->output)) {
102
            $this->output = $this->buildOutput();
103
        }
104
105
        return $asFile ? "<?php\n\n{$this->output}" : $this->output;
106
    }
107
108
    private function buildFileComment(): string
109
    {
110
        $content = $this->spec->getFileComment();
111
        if ('' == $content) {
112
            return '';
113
        }
114
        $comment = "/**\n";
115
        $commentLineList = explode("\n", $content);
116
        foreach ($commentLineList as $commentLine) {
117
            $comment .= rtrim(" * {$commentLine}") . "\n";
118
        }
119
        $comment .= " */\n";
120
121
        return $comment;
122
    }
123
124
    /**
125
     * @return string
126
     * @throws ReflectionException
127
     */
128
    public function buildHeader(): string
129
    {
130
        $headerParts = [];
131
        $namespace = $this->spec->getTargetNamespaceName();
132
        if ($namespace != '') {
133
            $headerParts[] = $this->buildMethodPart("namespace {$namespace};", 0);
134
        }
135
        $useList = $this->buildUseList();
136
        if ('' != $useList) {
137
            $headerParts[] = $useList;
138
        }
139
        $header = $this->buildMethodPart($this->spec->getHeader(), 0);
140
        if ('' != $header) {
141
            $headerParts[] = $header;
142
        }
143
144
        return implode("\n", $headerParts);
145
    }
146
147
    /**
148
     * @return string
149
     * @throws ReflectionException
150
     */
151
    private function buildUseList(): string
152
    {
153
        $result = '';
154
        foreach ($this->spec->getUsedClassList() as $alias => $className) {
155
            $classWithAlias = is_string($alias) ? "{$className} {$alias}" : $className;
156
            $result .= $this->buildMethodPart("use {$classWithAlias};", 0);
157
        }
158
159
        return $result;
160
    }
161
162
    /**
163
     * @return string
164
     * @throws ReflectionException
165
     */
166
    private function buildMatchParameters(): string
167
    {
168
        $paramList = [];
169
        foreach ($this->spec->getMatchMethod()->getParameters() as $matchParameter) {
170
            if ($matchParameter->hasType()) {
171
                $param = $matchParameter->getType()->isBuiltin()
172
                    ? $matchParameter->getType()->getName()
173
                    : $matchParameter->getClass()->getShortName();
174
                $param .= " ";
175
            } else {
176
                $param = "";
177
            }
178
            $param .= "\${$matchParameter->getName()}";
179
            $paramList[] = $param;
180
        }
181
182
        return implode(", ", $paramList);
183
    }
184
185
    /**
186
     * @return string
187
     * @throws Exception
188
     */
189
    private function buildMatchBody(): string
190
    {
191
        $result = $this->buildBeforeMatch();
192
193
        foreach ($this->spec->getModeList() as $mode) {
194
            if (TokenMatcherInterface::DEFAULT_MODE == $mode) {
195
                continue;
196
            }
197
            $result .=
198
                $this->buildMethodPart("if (\$context->getMode() == '{$mode}') {") .
199
                $this->buildFsmEntry($mode, 3) .
200
                $this->buildMethodPart("}");
201
        }
202
        foreach ($this->spec->getModeList() as $mode) {
203
            if (TokenMatcherInterface::DEFAULT_MODE == $mode) {
204
                $result .= $this->buildFsmEntry(TokenMatcherInterface::DEFAULT_MODE) . "\n";
205
            }
206
            $result .= $this->buildFsmMoves($mode);
207
        }
208
209
        $result .= $this->buildErrorState();
210
211
        return $result;
212
    }
213
214
    private function buildBeforeMatch(): string
215
    {
216
        return
217
            $this->buildMethodPart("\$context = \$this->createContext(\$buffer, \$tokenFactory);") .
218
            $this->buildMethodPart($this->spec->getBeforeMatch());
219
    }
220
221
    /**
222
     * @param string $mode
223
     * @param int    $indent
224
     * @return string
225
     * @throws Exception
226
     */
227
    private function buildFsmEntry(string $mode, int $indent = 2): string
228
    {
229
        $state = $this->getDfa($mode)->getStateMap()->getStartState();
230
231
        return $this->buildMethodPart("goto {$this->buildStateLabel('state', $mode, $state)};", $indent);
232
    }
233
234
    private function buildStateLabel(string $prefix, string $mode, int $state): string
235
    {
236
        $contextSuffix = TokenMatcherInterface::DEFAULT_MODE == $mode
237
            ? ''
238
            : ucfirst($mode);
239
240
        return "{$prefix}{$contextSuffix}{$state}";
241
    }
242
243
    /**
244
     * @param string $mode
245
     * @return string
246
     * @throws Exception
247
     */
248
    private function buildFsmMoves(string $mode): string
249
    {
250
        $result = '';
251
        foreach ($this->getDfa($mode)->getStateMap()->getStateList() as $stateIn) {
252
            if ($this->isFinishStateWithSingleEnteringTransition($mode, $stateIn)) {
253
                continue;
254
            }
255
            $result .=
256
                $this->buildStateEntry($mode, $stateIn) .
257
                $this->buildStateTransitionList($mode, $stateIn) .
258
                $this->buildStateFinish($mode, $stateIn);
259
        }
260
261
        return $result;
262
    }
263
264
    /**
265
     * @param string $mode
266
     * @param int    $stateIn
267
     * @return string
268
     * @throws Exception
269
     */
270
    private function buildStateEntry(string $mode, int $stateIn): string
271
    {
272
        $result = '';
273
        $result .= $this->buildMethodPart("{$this->buildStateLabel('state', $mode, $stateIn)}:");
274
        $moves = $this->getDfa($mode)->getTransitionMap()->getExitList($stateIn);
275
        if (empty($moves)) {
276
            return $result;
277
        }
278
        $result .= $this->buildMethodPart("if (\$context->getBuffer()->isEnd()) {");
279
        $result .= $this->getDfa($mode)->getStateMap()->isFinishState($stateIn)
280
            ? $this->buildMethodPart("goto {$this->buildStateLabel('finish', $mode, $stateIn)};", 3)
281
            : $this->buildMethodPart("goto error;", 3);
282
        $result .=
283
            $this->buildMethodPart("}") .
284
            $this->buildMethodPart("\$char = \$context->getBuffer()->getSymbol();");
285
286
        return $result;
287
    }
288
289
    /**
290
     * @param string $mode
291
     * @param int    $stateIn
292
     * @return string
293
     * @throws Exception
294
     */
295
    private function buildStateTransitionList(string $mode, int $stateIn): string
296
    {
297
        $result = '';
298
        foreach ($this->getDfa($mode)->getTransitionMap()->getExitList($stateIn) as $stateOut => $symbolList) {
299
            foreach ($symbolList as $symbol) {
300
                $result .=
301
                    $this->buildMethodPart("if ({$this->buildRangeSetCondition($mode, $symbol)}) {") .
302
                    $this->buildOnTransition() .
303
                    $this->buildMethodPart("\$context->getBuffer()->nextSymbol();", 3);
304
                $result .= $this->isFinishStateWithSingleEnteringTransition($mode, $stateOut)
305
                    ? $this->buildToken($mode, $stateOut, 3)
306
                    : $this->buildStateTransition($mode, $stateOut, 3);
307
                $result .= $this->buildMethodPart("}");
308
            }
309
        }
310
311
        return $result;
312
    }
313
314
    /**
315
     * @param string $mode
316
     * @param int    $stateOut
317
     * @param int    $indent
318
     * @return string
319
     */
320
    private function buildStateTransition(string $mode, int $stateOut, int $indent = 3): string
321
    {
322
        return $this->buildMethodPart("goto {$this->buildStateLabel('state', $mode, $stateOut)};", $indent);
323
    }
324
325
    /**
326
     * @param string $mode
327
     * @param int    $stateOut
328
     * @return bool
329
     * @throws Exception
330
     */
331
    private function isFinishStateWithSingleEnteringTransition(string $mode, int $stateOut): bool
332
    {
333
        if (!$this->getDfa($mode)->getStateMap()->isFinishState($stateOut)) {
334
            return false;
335
        }
336
        $enters = $this->getDfa($mode)->getTransitionMap()->getEnterList($stateOut);
337
        $exits = $this->getDfa($mode)->getTransitionMap()->getExitList($stateOut);
338
        if (!(count($enters) == 1 && count($exits) == 0)) {
339
            return false;
340
        }
341
        $symbolList = array_pop($enters);
342
343
        return count($symbolList) == 1;
344
    }
345
346
    private function buildHex(int $char): string
347
    {
348
        $hexChar = strtoupper(dechex($char));
349
        if (strlen($hexChar) % 2 != 0) {
350
            $hexChar = "0{$hexChar}";
351
        }
352
353
        return "0x{$hexChar}";
354
    }
355
356
    private function buildRangeCondition(Range $range): array
357
    {
358
        $startChar = $this->buildHex($range->getStart());
359
        if ($range->getStart() == $range->getFinish()) {
360
            return ["{$startChar} == \$char"];
361
        }
362
        $finishChar = $this->buildHex($range->getFinish());
363
        if ($range->getStart() + 1 == $range->getFinish()) {
364
            return [
365
                "{$startChar} == \$char",
366
                "{$finishChar} == \$char",
367
            ];
368
        }
369
370
        return ["{$startChar} <= \$char && \$char <= {$finishChar}"];
371
    }
372
373
    /**
374
     * @param string $mode
375
     * @param int    $symbol
376
     * @return string
377
     * @throws Exception
378
     */
379
    private function buildRangeSetCondition(string $mode, int $symbol): string
380
    {
381
        $rangeSet = $this->getDfa($mode)->getSymbolTable()->getRangeSet($symbol);
382
383
        $conditionList = [];
384
        foreach ($rangeSet->getRanges() as $range) {
385
            $conditionList = array_merge($conditionList, $this->buildRangeCondition($range));
386
        }
387
        $result = implode(" || ", $conditionList);
388
        if (strlen($result) + 15 <= 120 || count($conditionList) == 1) {
389
            return ltrim($result);
390
        }
391
        $result = $this->buildMethodPart(implode(" ||\n", $conditionList), 1);
392
        if (count($conditionList) > 10) {
393
            $method = "isMode" . ucfirst($mode) . "Symbol{$symbol}";
394
            $this->conditionFunctions[$method] = $result;
395
396
            return "\$this->{$method}(\$char)";
397
        }
398
399
        return "\n    " . ltrim($result);
400
    }
401
402
    private function buildConditionFunctions(): string
403
    {
404
        $result = '';
405
406
        foreach ($this->conditionFunctions as $method => $conditionList) {
407
            $result .=
408
                "\n    private function {$method}(int \$char): bool\n    {\n" .
409
                $this->buildMethodPart("return") .
410
                $this->buildMethodPart(rtrim($conditionList) . ';') .
411
                "    }\n";
412
        }
413
414
        return $result;
415
    }
416
417
    /**
418
     * @param string $mode
419
     * @param int    $stateIn
420
     * @return string
421
     * @throws Exception
422
     */
423
    private function buildStateFinish(string $mode, int $stateIn): string
424
    {
425
        if (!$this->getDfa($mode)->getStateMap()->isFinishState($stateIn)) {
426
            return $this->buildMethodPart("goto error;\n");
427
        }
428
        $result = '';
429
        if (!empty($this->getDfa($mode)->getTransitionMap()->getExitList($stateIn))) {
430
            $result .= $this->buildMethodPart("{$this->buildStateLabel('finish', $mode, $stateIn)}:");
431
        }
432
        $result .= "{$this->buildToken($mode, $stateIn)}\n";
433
434
        return $result;
435
    }
436
437
    /**
438
     * @param string $mode
439
     * @param int    $stateIn
440
     * @param int    $indent
441
     * @return string
442
     * @throws Exception
443
     */
444
    private function buildToken(string $mode, int $stateIn, int $indent = 2): string
445
    {
446
        if (!isset($this->regExpFinishMap[$mode][$stateIn])) {
447
            throw new Exception("No regular expressions found for state {$mode}:{$stateIn}");
448
        }
449
        $tokenSpec = $this->spec->getTokenSpec($mode, $this->regExpFinishMap[$mode][$stateIn]);
450
451
        return
452
            $this->buildMethodPart("// {$tokenSpec->getRegExp()}", $indent) .
453
            $this->buildSingleToken($tokenSpec, $indent);
454
    }
455
456
    private function buildSingleToken(TokenSpec $tokenSpec, int $indent): string
457
    {
458
        return
459
            $this->buildMethodPart($tokenSpec->getCode(), $indent) .
460
            $this->buildOnToken($indent) . "\n" .
461
            $this->buildMethodPart("return true;", $indent);
462
    }
463
464
    private function buildErrorState(): string
465
    {
466
        $code = $this->spec->getOnError();
467
468
        return
469
            $this->buildMethodPart("error:") .
470
            $this->buildMethodPart('' == $code ? "return false;" : $code);
471
    }
472
473
    private function buildMethodPart(string $code, int $indent = 2): string
474
    {
475
        if ('' == $code) {
476
            return '';
477
        }
478
        $result = '';
479
        $codeLineList = explode("\n", $code);
480
        foreach ($codeLineList as $codeLine) {
481
            $line = '';
482
            for ($i = 0; $i < $indent; $i++) {
483
                $line .= "    ";
484
            }
485
            $result .= rtrim($line . $codeLine) . "\n";
486
        }
487
488
        return $result;
489
    }
490
491
    private function buildOnTransition(): string
492
    {
493
        return $this->buildMethodPart($this->spec->getOnTransition(), 3);
494
    }
495
496
    private function buildOnToken(int $indent = 2): string
497
    {
498
        return $this->buildMethodPart($this->spec->getOnToken(), $indent);
499
    }
500
501
    /**
502
     * @param string $context
503
     * @return Dfa
504
     * @throws Exception
505
     */
506
    private function getDfa(string $context): Dfa
507
    {
508
        if (!isset($this->dfa[$context])) {
509
            $this->dfa[$context] = $this->buildDfa($context);
510
        }
511
512
        return $this->dfa[$context];
513
    }
514
515
    /**
516
     * @param string $mode
517
     * @return Dfa
518
     * @throws Exception
519
     */
520
    private function buildDfa(string $mode): Dfa
521
    {
522
        $nfa = new Nfa();
523
        $startState = $nfa->getStateMap()->createState();
524
        $nfa->getStateMap()->addStartState($startState);
525
        $nfaRegExpMap = [];
526
        /** @var Dfa[] $dfaList */
527
        $dfaList = [];
528
        foreach ($this->spec->getTokenSpecList($mode) as $tokenSpec) {
529
            $existingStates = $nfa->getStateMap()->getStateList();
530
            $regExpEntryState = $nfa->getStateMap()->createState();
531
            $nfa
532
                ->getEpsilonTransitionMap()
533
                ->addTransition($startState, $regExpEntryState, true);
534
            $this->buildRegExp($nfa, $regExpEntryState, $tokenSpec->getRegExp());
535
            $regExpStates = array_diff($nfa->getStateMap()->getStateList(), $existingStates);
536
            $nfaRegExpMap[$tokenSpec->getRegExp()] = $regExpStates;
537
            $dfaList[$tokenSpec->getRegExp()] = $this->buildIndependentRegExp($tokenSpec->getRegExp());
538
        }
539
540
        $joinedNfa = new Nfa();
541
        $startState = $joinedNfa->getStateMap()->createState();
542
        $joinedNfa->getStateMap()->addStartState($startState);
543
        $languageBuilder = LanguageBuilder::forNfa($joinedNfa);
544
        $joinedNfaStates = [];
545
        $nfaRegExpMap = [];
546
        $regExpFinishMap = [];
547
        foreach ($dfaList as $regExp => $dfa) {
548
            $nfaRegExpMap[$regExp] = [];
549
            foreach ($dfa->getStateMap()->getStateList() as $dfaState) {
550
                $nfaState = $joinedNfa->getStateMap()->createState();
551
                $nfaRegExpMap[$regExp][] = $nfaState;
552
                $joinedNfaStates[$dfaState] = $nfaState;
553
                if ($dfa->getStateMap()->isStartState($dfaState)) {
554
                    $joinedNfa->getEpsilonTransitionMap()->addTransition($startState, $nfaState, true);
555
                }
556
                if ($dfa->getStateMap()->isFinishState($dfaState)) {
557
                    $regExpFinishMap[$regExp][] = $nfaState;
558
                    $joinedNfa->getStateMap()->addFinishState($nfaState);
559
                }
560
            }
561
            foreach ($dfa->getTransitionMap()->getTransitionList() as $dfaStateIn => $transitions) {
562
                foreach ($transitions as $dfaStateOut => $symbols) {
563
                    foreach ($symbols as $symbol) {
564
                        $rangeSet = $dfa->getSymbolTable()->getRangeSet($symbol);
565
                        $newSymbols = $languageBuilder->getSymbolList(...$rangeSet->getRanges());
566
                        $oldSymbols = $joinedNfa
567
                            ->getSymbolTransitionMap()
568
                            ->transitionExists($joinedNfaStates[$dfaStateIn], $joinedNfaStates[$dfaStateOut])
569
                            ? $joinedNfa
570
                                ->getSymbolTransitionMap()
571
                                ->getTransition($joinedNfaStates[$dfaStateIn], $joinedNfaStates[$dfaStateOut])
572
                            : [];
573
                        $joinedNfa->getSymbolTransitionMap()->replaceTransition(
574
                            $joinedNfaStates[$dfaStateIn],
575
                            $joinedNfaStates[$dfaStateOut],
576
                            array_unique(array_merge($oldSymbols, $newSymbols))
577
                        );
578
                    }
579
                }
580
            }
581
        }
582
583
        $dfa = new Dfa();
584
        (new DfaBuilder($dfa, $joinedNfa))->run();
585
586
        $dfaRegExpFinishMap = [];
587
        foreach ($dfa->getStateMap()->getFinishStateList() as $dfaFinishState) {
588
            $nfaFinishStates = array_intersect(
589
                $dfa->getStateMap()->getStateValue($dfaFinishState),
590
                $joinedNfa->getStateMap()->getFinishStateList()
591
            );
592
            foreach ($regExpFinishMap as $regExp => $regExpFinishStates) {
593
                foreach ($nfaFinishStates as $nfaFinishState) {
594
                    if (in_array($nfaFinishState, $regExpFinishStates)) {
595
                        $dfaRegExpFinishMap[$dfaFinishState] = (string) $regExp;
596
                        break 2;
597
                    }
598
                }
599
            }
600
        }
601
        foreach ($this->spec->getTokenSpecList($mode) as $tokenSpec) {
602
            if (!in_array($tokenSpec->getRegExp(), $dfaRegExpFinishMap)) {
603
                throw new Exception("Token not reachable for regular expression: {$tokenSpec->getRegExp()} ");
604
            }
605
        }
606
        $this->regExpFinishMap[$mode] = $dfaRegExpFinishMap;
607
608
        return $dfa;
609
    }
610
611
    /**
612
     * @param Nfa    $nfa
613
     * @param int    $entryState
614
     * @param string $regExp
615
     * @throws Exception
616
     */
617
    private function buildRegExp(Nfa $nfa, int $entryState, string $regExp): void
618
    {
619
        $buffer = CharBufferFactory::createFromString($regExp);
620
        $tree = new Tree();
621
        ParserFactory::createFromBuffer($tree, $buffer)->run();
622
        $nfaBuilder = new NfaBuilder($nfa, PropertyLoader::create());
623
        $nfaBuilder->setStartState($entryState);
624
        (new Translator($tree, $nfaBuilder))->run();
625
    }
626
627
    /**
628
     * @param string $regExp
629
     * @return Dfa
630
     * @throws Exception
631
     */
632
    private function buildIndependentRegExp(string $regExp): Dfa
633
    {
634
        $buffer = CharBufferFactory::createFromString($regExp);
635
        $tree = new Tree();
636
        ParserFactory::createFromBuffer($tree, $buffer)->run();
637
        $nfa = new Nfa();
638
        $nfaBuilder = new NfaBuilder($nfa, PropertyLoader::create());
639
        (new Translator($tree, $nfaBuilder))->run();
640
641
        return DfaBuilder::fromNfa($nfa);
642
    }
643
}
644