TokenMatcherGenerator::buildDfa()   F
last analyzed

Complexity

Conditions 16
Paths 780

Size

Total Lines 89
Code Lines 65

Duplication

Lines 0
Ratio 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
cc 16
eloc 65
c 3
b 0
f 0
nc 780
nop 1
dl 0
loc 89
rs 1.7055

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Remorhaz\UniLex\Lexer;
6
7
use ReflectionClass;
8
use ReflectionException;
9
use Remorhaz\IntRangeSets\RangeInterface;
10
use Remorhaz\UCD\PropertyRangeLoader;
11
use Remorhaz\UniLex\AST\Translator;
12
use Remorhaz\UniLex\AST\Tree;
13
use Remorhaz\UniLex\Exception;
14
use Remorhaz\UniLex\RegExp\FSM\Dfa;
15
use Remorhaz\UniLex\RegExp\FSM\DfaBuilder;
16
use Remorhaz\UniLex\RegExp\FSM\LanguageBuilder;
17
use Remorhaz\UniLex\RegExp\FSM\Nfa;
18
use Remorhaz\UniLex\RegExp\FSM\NfaBuilder;
19
use Remorhaz\UniLex\RegExp\ParserFactory;
20
use Remorhaz\UniLex\Unicode\CharBufferFactory;
21
use Throwable;
22
23
use function array_diff;
24
use function array_intersect;
25
use function array_merge;
26
use function array_pop;
27
use function array_unique;
28
use function count;
29
use function implode;
30
use function in_array;
31
32
class TokenMatcherGenerator
33
{
34
    private $spec;
35
36
    private $output;
37
38
    private $dfa;
39
40
    private $regExpFinishMap = [];
41
42
    private $conditionFunctions = [];
43
44
    public function __construct(TokenMatcherSpec $spec)
45
    {
46
        $this->spec = $spec;
47
    }
48
49
    /**
50
     * @return string
51
     * @throws Exception
52
     * @throws ReflectionException
53
     */
54
    private function buildOutput(): string
55
    {
56
        $this->conditionFunctions = [];
57
58
        return
59
            "{$this->buildFileComment()}\ndeclare(strict_types=1);\n\n" .
60
            "{$this->buildHeader()}\n" .
61
            "class {$this->spec->getTargetShortName()} extends {$this->spec->getTemplateClass()->getShortName()}\n" .
62
            "{\n" .
63
            "    public function match({$this->buildMatchParameters()}): bool\n" .
64
            "    {\n{$this->buildMatchBody()}" .
65
            "    }\n" .
66
            $this->buildConditionFunctions() .
67
            "}\n";
68
    }
69
70
    /**
71
     * @return TokenMatcherInterface
72
     * @throws Exception
73
     */
74
    public function load(): TokenMatcherInterface
75
    {
76
        $targetClass = $this->spec->getTargetClassName();
77
        if (!class_exists($targetClass)) {
78
            try {
79
                $source = $this->getOutput(false);
80
                eval($source);
0 ignored issues
show
introduced by
The use of eval() is discouraged.
Loading history...
81
            } catch (Throwable $e) {
82
                throw new Exception("Invalid PHP code generated", 0, $e);
83
            }
84
            if (!class_exists($targetClass)) {
85
                throw new Exception("Failed to generate target class");
86
            }
87
        }
88
89
        return new $targetClass();
90
    }
91
92
    /**
93
     * @param bool $asFile
94
     * @return string
95
     * @throws Exception
96
     * @throws ReflectionException
97
     */
98
    public function getOutput(bool $asFile = true): string
99
    {
100
        if (!isset($this->output)) {
101
            $this->output = $this->buildOutput();
102
        }
103
104
        return $asFile ? "<?php\n\n{$this->output}" : $this->output;
105
    }
106
107
    private function buildFileComment(): string
108
    {
109
        $content = $this->spec->getFileComment();
110
        if ('' == $content) {
111
            return '';
112
        }
113
        $comment = "/**\n";
114
        $commentLineList = explode("\n", $content);
115
        foreach ($commentLineList as $commentLine) {
116
            $comment .= rtrim(" * {$commentLine}") . "\n";
117
        }
118
        $comment .= " */\n";
119
120
        return $comment;
121
    }
122
123
    /**
124
     * @return string
125
     * @throws ReflectionException
126
     */
127
    public function buildHeader(): string
128
    {
129
        $headerParts = [];
130
        $namespace = $this->spec->getTargetNamespaceName();
131
        if ($namespace != '') {
132
            $headerParts[] = $this->buildMethodPart("namespace {$namespace};", 0);
133
        }
134
        $useList = $this->buildUseList();
135
        if ('' != $useList) {
136
            $headerParts[] = $useList;
137
        }
138
        $header = $this->buildMethodPart($this->spec->getHeader(), 0);
139
        if ('' != $header) {
140
            $headerParts[] = $header;
141
        }
142
143
        return implode("\n", $headerParts);
144
    }
145
146
    /**
147
     * @return string
148
     * @throws ReflectionException
149
     */
150
    private function buildUseList(): string
151
    {
152
        $result = '';
153
        foreach ($this->spec->getUsedClassList() as $alias => $className) {
154
            $classWithAlias = is_string($alias) ? "{$className} {$alias}" : $className;
155
            $result .= $this->buildMethodPart("use {$classWithAlias};", 0);
156
        }
157
158
        return $result;
159
    }
160
161
    /**
162
     * @return string
163
     * @throws ReflectionException
164
     */
165
    private function buildMatchParameters(): string
166
    {
167
        $paramList = [];
168
        foreach ($this->spec->getMatchMethod()->getParameters() as $matchParameter) {
169
            if ($matchParameter->hasType()) {
170
                $param = $matchParameter->getType()->isBuiltin()
171
                    ? $matchParameter->getType()->getName()
0 ignored issues
show
Bug introduced by
The method getName() does not exist on ReflectionType. It seems like you code against a sub-type of ReflectionType such as ReflectionNamedType. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

171
                    ? $matchParameter->getType()->/** @scrutinizer ignore-call */ getName()
Loading history...
172
                    : (new ReflectionClass($matchParameter->getType()->getName()))->getShortName();
173
                $param .= " ";
174
            } else {
175
                $param = "";
176
            }
177
            $param .= "\${$matchParameter->getName()}";
178
            $paramList[] = $param;
179
        }
180
181
        return implode(", ", $paramList);
182
    }
183
184
    /**
185
     * @return string
186
     * @throws Exception
187
     */
188
    private function buildMatchBody(): string
189
    {
190
        $result = $this->buildBeforeMatch();
191
192
        foreach ($this->spec->getModeList() as $mode) {
193
            if (TokenMatcherInterface::DEFAULT_MODE == $mode) {
194
                continue;
195
            }
196
            $result .=
197
                $this->buildMethodPart("if (\$context->getMode() == '{$mode}') {") .
198
                $this->buildFsmEntry($mode, 3) .
199
                $this->buildMethodPart("}");
200
        }
201
        foreach ($this->spec->getModeList() as $mode) {
202
            if (TokenMatcherInterface::DEFAULT_MODE == $mode) {
203
                $result .= $this->buildFsmEntry(TokenMatcherInterface::DEFAULT_MODE) . "\n";
204
            }
205
            $result .= $this->buildFsmMoves($mode);
206
        }
207
208
        $result .= $this->buildErrorState();
209
210
        return $result;
211
    }
212
213
    private function buildBeforeMatch(): string
214
    {
215
        return
216
            $this->buildMethodPart("\$context = \$this->createContext(\$buffer, \$tokenFactory);") .
217
            $this->buildMethodPart($this->spec->getBeforeMatch());
218
    }
219
220
    /**
221
     * @param string $mode
222
     * @param int    $indent
223
     * @return string
224
     * @throws Exception
225
     */
226
    private function buildFsmEntry(string $mode, int $indent = 2): string
227
    {
228
        $state = $this->getDfa($mode)->getStateMap()->getStartState();
229
230
        return $this->buildMethodPart("goto {$this->buildStateLabel('state', $mode, $state)};", $indent);
231
    }
232
233
    private function buildStateLabel(string $prefix, string $mode, int $state): string
234
    {
235
        $contextSuffix = TokenMatcherInterface::DEFAULT_MODE == $mode
236
            ? ''
237
            : ucfirst($mode);
238
239
        return "{$prefix}{$contextSuffix}{$state}";
240
    }
241
242
    /**
243
     * @param string $mode
244
     * @return string
245
     * @throws Exception
246
     */
247
    private function buildFsmMoves(string $mode): string
248
    {
249
        $result = '';
250
        foreach ($this->getDfa($mode)->getStateMap()->getStateList() as $stateIn) {
251
            if ($this->isFinishStateWithSingleEnteringTransition($mode, $stateIn)) {
252
                continue;
253
            }
254
            $result .=
255
                $this->buildStateEntry($mode, $stateIn) .
256
                $this->buildStateTransitionList($mode, $stateIn) .
257
                $this->buildStateFinish($mode, $stateIn);
258
        }
259
260
        return $result;
261
    }
262
263
    /**
264
     * @param string $mode
265
     * @param int    $stateIn
266
     * @return string
267
     * @throws Exception
268
     */
269
    private function buildStateEntry(string $mode, int $stateIn): string
270
    {
271
        $result = '';
272
        $result .= $this->buildMethodPart("{$this->buildStateLabel('state', $mode, $stateIn)}:");
273
        $moves = $this->getDfa($mode)->getTransitionMap()->getExitList($stateIn);
274
        if (empty($moves)) {
275
            return $result;
276
        }
277
        $result .= $this->buildMethodPart("if (\$context->getBuffer()->isEnd()) {");
278
        $result .= $this->getDfa($mode)->getStateMap()->isFinishState($stateIn)
279
            ? $this->buildMethodPart("goto {$this->buildStateLabel('finish', $mode, $stateIn)};", 3)
280
            : $this->buildMethodPart("goto error;", 3);
281
        $result .=
282
            $this->buildMethodPart("}") .
283
            $this->buildMethodPart("\$char = \$context->getBuffer()->getSymbol();");
284
285
        return $result;
286
    }
287
288
    /**
289
     * @param string $mode
290
     * @param int    $stateIn
291
     * @return string
292
     * @throws Exception
293
     */
294
    private function buildStateTransitionList(string $mode, int $stateIn): string
295
    {
296
        $result = '';
297
        foreach ($this->getDfa($mode)->getTransitionMap()->getExitList($stateIn) as $stateOut => $symbolList) {
298
            foreach ($symbolList as $symbol) {
299
                $result .=
300
                    $this->buildMethodPart("if ({$this->buildRangeSetCondition($mode, $symbol)}) {") .
301
                    $this->buildOnTransition() .
302
                    $this->buildMethodPart("\$context->getBuffer()->nextSymbol();", 3);
303
                $result .= $this->isFinishStateWithSingleEnteringTransition($mode, $stateOut)
304
                    ? $this->buildToken($mode, $stateOut, 3)
305
                    : $this->buildStateTransition($mode, $stateOut, 3);
306
                $result .= $this->buildMethodPart("}");
307
            }
308
        }
309
310
        return $result;
311
    }
312
313
    /**
314
     * @param string $mode
315
     * @param int    $stateOut
316
     * @param int    $indent
317
     * @return string
318
     */
319
    private function buildStateTransition(string $mode, int $stateOut, int $indent = 3): string
320
    {
321
        return $this->buildMethodPart("goto {$this->buildStateLabel('state', $mode, $stateOut)};", $indent);
322
    }
323
324
    /**
325
     * @param string $mode
326
     * @param int    $stateOut
327
     * @return bool
328
     * @throws Exception
329
     */
330
    private function isFinishStateWithSingleEnteringTransition(string $mode, int $stateOut): bool
331
    {
332
        if (!$this->getDfa($mode)->getStateMap()->isFinishState($stateOut)) {
333
            return false;
334
        }
335
        $enters = $this->getDfa($mode)->getTransitionMap()->getEnterList($stateOut);
336
        $exits = $this->getDfa($mode)->getTransitionMap()->getExitList($stateOut);
337
        if (!(count($enters) == 1 && count($exits) == 0)) {
338
            return false;
339
        }
340
        $symbolList = array_pop($enters);
341
342
        return count($symbolList) == 1;
343
    }
344
345
    private function buildHex(int $char): string
346
    {
347
        $hexChar = strtoupper(dechex($char));
348
        if (strlen($hexChar) % 2 != 0) {
349
            $hexChar = "0{$hexChar}";
350
        }
351
352
        return "0x{$hexChar}";
353
    }
354
355
    private function buildRangeCondition(RangeInterface $range): array
356
    {
357
        $startChar = $this->buildHex($range->getStart());
358
        if ($range->getStart() == $range->getFinish()) {
359
            return ["{$startChar} == \$char"];
360
        }
361
        $finishChar = $this->buildHex($range->getFinish());
362
        if ($range->getStart() + 1 == $range->getFinish()) {
363
            return [
364
                "{$startChar} == \$char",
365
                "{$finishChar} == \$char",
366
            ];
367
        }
368
369
        return ["{$startChar} <= \$char && \$char <= {$finishChar}"];
370
    }
371
372
    /**
373
     * @param string $mode
374
     * @param int    $symbol
375
     * @return string
376
     * @throws Exception
377
     */
378
    private function buildRangeSetCondition(string $mode, int $symbol): string
379
    {
380
        $rangeSet = $this->getDfa($mode)->getSymbolTable()->getRangeSet($symbol);
381
382
        $conditionList = [];
383
        foreach ($rangeSet->getRanges() as $range) {
384
            $conditionList = array_merge($conditionList, $this->buildRangeCondition($range));
385
        }
386
        $result = implode(" || ", $conditionList);
387
        if (strlen($result) + 15 <= 120 || count($conditionList) == 1) {
388
            return ltrim($result);
389
        }
390
        $result = $this->buildMethodPart(implode(" ||\n", $conditionList), 1);
391
        if (count($conditionList) > 10) {
392
            $method = "isMode" . ucfirst($mode) . "Symbol{$symbol}";
393
            $this->conditionFunctions[$method] = $result;
394
395
            return "\$this->{$method}(\$char)";
396
        }
397
398
        return "\n    " . ltrim($result);
399
    }
400
401
    private function buildConditionFunctions(): string
402
    {
403
        $result = '';
404
405
        foreach ($this->conditionFunctions as $method => $conditionList) {
406
            $result .=
407
                "\n    private function {$method}(int \$char): bool\n    {\n" .
408
                $this->buildMethodPart("return") .
409
                $this->buildMethodPart(rtrim($conditionList) . ';') .
410
                "    }\n";
411
        }
412
413
        return $result;
414
    }
415
416
    /**
417
     * @param string $mode
418
     * @param int    $stateIn
419
     * @return string
420
     * @throws Exception
421
     */
422
    private function buildStateFinish(string $mode, int $stateIn): string
423
    {
424
        if (!$this->getDfa($mode)->getStateMap()->isFinishState($stateIn)) {
425
            return $this->buildMethodPart("goto error;\n");
426
        }
427
        $result = '';
428
        if (!empty($this->getDfa($mode)->getTransitionMap()->getExitList($stateIn))) {
429
            $result .= $this->buildMethodPart("{$this->buildStateLabel('finish', $mode, $stateIn)}:");
430
        }
431
        $result .= "{$this->buildToken($mode, $stateIn)}\n";
432
433
        return $result;
434
    }
435
436
    /**
437
     * @param string $mode
438
     * @param int    $stateIn
439
     * @param int    $indent
440
     * @return string
441
     * @throws Exception
442
     */
443
    private function buildToken(string $mode, int $stateIn, int $indent = 2): string
444
    {
445
        if (!isset($this->regExpFinishMap[$mode][$stateIn])) {
446
            throw new Exception("No regular expressions found for state {$mode}:{$stateIn}");
447
        }
448
        $tokenSpec = $this->spec->getTokenSpec($mode, $this->regExpFinishMap[$mode][$stateIn]);
449
450
        return
451
            $this->buildMethodPart("// {$tokenSpec->getRegExp()}", $indent) .
452
            $this->buildSingleToken($tokenSpec, $indent);
453
    }
454
455
    private function buildSingleToken(TokenSpec $tokenSpec, int $indent): string
456
    {
457
        return
458
            $this->buildMethodPart($tokenSpec->getCode(), $indent) .
459
            $this->buildOnToken($indent) . "\n" .
460
            $this->buildMethodPart("return true;", $indent);
461
    }
462
463
    private function buildErrorState(): string
464
    {
465
        $code = $this->spec->getOnError();
466
467
        return
468
            $this->buildMethodPart("error:") .
469
            $this->buildMethodPart('' == $code ? "return false;" : $code);
470
    }
471
472
    private function buildMethodPart(string $code, int $indent = 2): string
473
    {
474
        if ('' == $code) {
475
            return '';
476
        }
477
        $result = '';
478
        $codeLineList = explode("\n", $code);
479
        foreach ($codeLineList as $codeLine) {
480
            $line = '';
481
            for ($i = 0; $i < $indent; $i++) {
482
                $line .= "    ";
483
            }
484
            $result .= rtrim($line . $codeLine) . "\n";
485
        }
486
487
        return $result;
488
    }
489
490
    private function buildOnTransition(): string
491
    {
492
        return $this->buildMethodPart($this->spec->getOnTransition(), 3);
493
    }
494
495
    private function buildOnToken(int $indent = 2): string
496
    {
497
        return $this->buildMethodPart($this->spec->getOnToken(), $indent);
498
    }
499
500
    /**
501
     * @param string $context
502
     * @return Dfa
503
     * @throws Exception
504
     */
505
    private function getDfa(string $context): Dfa
506
    {
507
        if (!isset($this->dfa[$context])) {
508
            $this->dfa[$context] = $this->buildDfa($context);
509
        }
510
511
        return $this->dfa[$context];
512
    }
513
514
    /**
515
     * @param string $mode
516
     * @return Dfa
517
     * @throws Exception
518
     */
519
    private function buildDfa(string $mode): Dfa
520
    {
521
        $nfa = new Nfa();
522
        $startState = $nfa->getStateMap()->createState();
523
        $nfa->getStateMap()->addStartState($startState);
524
        $nfaRegExpMap = [];
525
        /** @var Dfa[] $dfaList */
526
        $dfaList = [];
527
        foreach ($this->spec->getTokenSpecList($mode) as $tokenSpec) {
528
            $existingStates = $nfa->getStateMap()->getStateList();
529
            $regExpEntryState = $nfa->getStateMap()->createState();
530
            $nfa
531
                ->getEpsilonTransitionMap()
532
                ->addTransition($startState, $regExpEntryState, true);
533
            $this->buildRegExp($nfa, $regExpEntryState, $tokenSpec->getRegExp());
534
            $regExpStates = array_diff($nfa->getStateMap()->getStateList(), $existingStates);
535
            $nfaRegExpMap[$tokenSpec->getRegExp()] = $regExpStates;
536
            $dfaList[$tokenSpec->getRegExp()] = $this->buildIndependentRegExp($tokenSpec->getRegExp());
537
        }
538
539
        $joinedNfa = new Nfa();
540
        $startState = $joinedNfa->getStateMap()->createState();
541
        $joinedNfa->getStateMap()->addStartState($startState);
542
        $languageBuilder = LanguageBuilder::forNfa($joinedNfa);
543
        $joinedNfaStates = [];
544
        $nfaRegExpMap = [];
545
        $regExpFinishMap = [];
546
        foreach ($dfaList as $regExp => $dfa) {
547
            $nfaRegExpMap[$regExp] = [];
548
            foreach ($dfa->getStateMap()->getStateList() as $dfaState) {
549
                $nfaState = $joinedNfa->getStateMap()->createState();
550
                $nfaRegExpMap[$regExp][] = $nfaState;
551
                $joinedNfaStates[$dfaState] = $nfaState;
552
                if ($dfa->getStateMap()->isStartState($dfaState)) {
553
                    $joinedNfa->getEpsilonTransitionMap()->addTransition($startState, $nfaState, true);
554
                }
555
                if ($dfa->getStateMap()->isFinishState($dfaState)) {
556
                    $regExpFinishMap[$regExp][] = $nfaState;
557
                    $joinedNfa->getStateMap()->addFinishState($nfaState);
558
                }
559
            }
560
            foreach ($dfa->getTransitionMap()->getTransitionList() as $dfaStateIn => $transitions) {
561
                foreach ($transitions as $dfaStateOut => $symbols) {
562
                    foreach ($symbols as $symbol) {
563
                        $rangeSet = $dfa->getSymbolTable()->getRangeSet($symbol);
564
                        $newSymbols = $languageBuilder->getSymbolList(...$rangeSet->getRanges());
565
                        $oldSymbols = $joinedNfa
566
                            ->getSymbolTransitionMap()
567
                            ->transitionExists($joinedNfaStates[$dfaStateIn], $joinedNfaStates[$dfaStateOut])
568
                            ? $joinedNfa
569
                                ->getSymbolTransitionMap()
570
                                ->getTransition($joinedNfaStates[$dfaStateIn], $joinedNfaStates[$dfaStateOut])
571
                            : [];
572
                        $joinedNfa->getSymbolTransitionMap()->replaceTransition(
573
                            $joinedNfaStates[$dfaStateIn],
574
                            $joinedNfaStates[$dfaStateOut],
575
                            array_unique(array_merge($oldSymbols, $newSymbols))
576
                        );
577
                    }
578
                }
579
            }
580
        }
581
582
        $dfa = new Dfa();
583
        (new DfaBuilder($dfa, $joinedNfa))->run();
584
585
        $dfaRegExpFinishMap = [];
586
        foreach ($dfa->getStateMap()->getFinishStateList() as $dfaFinishState) {
587
            $nfaFinishStates = array_intersect(
588
                $dfa->getStateMap()->getStateValue($dfaFinishState),
589
                $joinedNfa->getStateMap()->getFinishStateList()
590
            );
591
            foreach ($regExpFinishMap as $regExp => $regExpFinishStates) {
592
                foreach ($nfaFinishStates as $nfaFinishState) {
593
                    if (in_array($nfaFinishState, $regExpFinishStates)) {
594
                        $dfaRegExpFinishMap[$dfaFinishState] = (string) $regExp;
595
                        break 2;
596
                    }
597
                }
598
            }
599
        }
600
        foreach ($this->spec->getTokenSpecList($mode) as $tokenSpec) {
601
            if (!in_array($tokenSpec->getRegExp(), $dfaRegExpFinishMap)) {
602
                throw new Exception("Token not reachable for regular expression: {$tokenSpec->getRegExp()} ");
603
            }
604
        }
605
        $this->regExpFinishMap[$mode] = $dfaRegExpFinishMap;
606
607
        return $dfa;
608
    }
609
610
    /**
611
     * @param Nfa    $nfa
612
     * @param int    $entryState
613
     * @param string $regExp
614
     * @throws Exception
615
     */
616
    private function buildRegExp(Nfa $nfa, int $entryState, string $regExp): void
617
    {
618
        $buffer = CharBufferFactory::createFromString($regExp);
619
        $tree = new Tree();
620
        ParserFactory::createFromBuffer($tree, $buffer)->run();
621
        $nfaBuilder = new NfaBuilder($nfa, PropertyRangeLoader::create());
622
        $nfaBuilder->setStartState($entryState);
623
        (new Translator($tree, $nfaBuilder))->run();
624
    }
625
626
    /**
627
     * @param string $regExp
628
     * @return Dfa
629
     * @throws Exception
630
     */
631
    private function buildIndependentRegExp(string $regExp): Dfa
632
    {
633
        $buffer = CharBufferFactory::createFromString($regExp);
634
        $tree = new Tree();
635
        ParserFactory::createFromBuffer($tree, $buffer)->run();
636
        $nfa = new Nfa();
637
        $nfaBuilder = new NfaBuilder($nfa, PropertyRangeLoader::create());
638
        (new Translator($tree, $nfaBuilder))->run();
639
640
        return DfaBuilder::fromNfa($nfa);
641
    }
642
}
643