Passed
Push — master ( 94e87c...9324f8 )
by Edward
03:09
created

PropertyBuilder::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 2
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 4
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Remorhaz\UniLex\RegExp;
6
7
use PhpParser\BuilderFactory;
8
use PhpParser\Comment\Doc;
9
use PhpParser\Node\Scalar\LNumber;
10
use PhpParser\Node\Stmt\Declare_;
11
use PhpParser\Node\Stmt\DeclareDeclare;
12
use PhpParser\Node\Stmt\Return_;
13
use ReflectionClass;
14
use ReflectionException;
15
use Remorhaz\UniLex\Console\PrettyPrinter;
16
use Remorhaz\UniLex\Exception as UniLexException;
17
use Remorhaz\UniLex\RegExp\FSM\Range;
18
use Remorhaz\UniLex\RegExp\FSM\RangeSet;
19
use RuntimeException;
20
use SplFileObject;
21
22
use function explode;
23
use function file_put_contents;
24
use function hexdec;
25
use function preg_match;
26
use function trim;
27
28
final class PropertyBuilder
29
{
30
31
    private const PROP_DIR = '/Properties';
32
33
    private $phpBuilder;
34
35
    private $printer;
36
37
    public function __construct()
38
    {
39
        $this->phpBuilder = new BuilderFactory();
40
        $this->printer = new PrettyPrinter();
41
    }
42
43
    /**
44
     * @param array $index
45
     * @return array
46
     * @throws UniLexException
47
     * @throws ReflectionException
48
     */
49
    public function buildUnicodeData(array $index): array
50
    {
51
        $source = new SplFileObject(__DIR__ . '/../../data/UnicodeData.txt');
52
        $charCounter = 0;
53
        echo "Parsing: ";
54
        $ranges = [];
55
        $lastCode = null;
56
        $lastProp = null;
57
        $rangeStart = null;
58
        $namedStarts = [];
59
        while (!$source->eof()) {
60
            $line = $source->fgets();
61
            if (false === $line) {
62
                throw new RuntimeException("Error reading line from unicode data file");
63
            }
64
            if ('' == $line) {
65
                continue;
66
            }
67
            $splitLine = explode(';', $line);
68
            $codeHex = $splitLine[0] ?? null;
69
            $name = $splitLine[1] ?? null;
70
            $prop = $splitLine[2] ?? null;
71
            if (!isset($codeHex, $name, $prop)) {
72
                throw new RuntimeException("Invalid line format");
73
            }
74
            $code = hexdec($codeHex);
75
            $isFirst = 1 === preg_match('#^<(.+), First>$#', $name, $matches);
76
            $firstName = $matches[1] ?? null;
77
            $isLast = 1 === preg_match('#^<(.+), Last>$#', $name, $matches);
78
            $lastName = $matches[1] ?? null;
79
            $range = null;
80
            if ($isFirst) {
81
                $namedStarts[$firstName] = $code;
82
                unset($rangeStart);
83
            } elseif ($isLast) {
84
                if (!isset($namedStarts[$lastName]) || isset($rangeStart) || $lastCode !== $namedStarts[$lastName]) {
85
                    throw new RuntimeException("Invalid file format");
86
                }
87
                /** @var int $lastCode */
88
                $range = new Range($lastCode, $code);
0 ignored issues
show
Bug introduced by
It seems like $code can also be of type double; however, parameter $finish of Remorhaz\UniLex\RegExp\FSM\Range::__construct() does only seem to accept integer|null, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

88
                $range = new Range($lastCode, /** @scrutinizer ignore-type */ $code);
Loading history...
89
            } elseif ($prop !== $lastProp) {
90
                /** @var int $rangeStart */
91
                if (isset($rangeStart, $lastCode)) {
92
                    $range = new Range($rangeStart, $lastCode);
93
                }
94
95
                $rangeStart = $code;
96
            }
97
98
            if (isset($range)) {
99
                if (!isset($ranges[$lastProp])) {
100
                    $ranges[$lastProp] = [];
101
                }
102
                $ranges[$lastProp][] = $range;
103
            }
104
105
            $lastCode = $code;
106
            $lastProp = $prop;
107
108
            if ($charCounter % 100 == 0) {
109
                echo ".";
110
            }
111
            $charCounter++;
112
        }
113
        $source = null;
0 ignored issues
show
Unused Code introduced by
The assignment to $source is dead and can be removed.
Loading history...
114
        echo " {$charCounter} characters\n";
115
116
        return $this->dumpProps($index, $this->buildRangeSets($ranges));
117
    }
118
119
    /**
120
     * @param array $index
121
     * @return array
122
     * @throws UniLexException
123
     * @throws ReflectionException
124
     */
125
    public function buildScripts(array $index): array
126
    {
127
        /** @var RangeSet[] $ranges */
128
        $ranges = [];
129
        $source = new SplFileObject(__DIR__ . '/../../data/Scripts.txt');
130
        $lastKnownCode = null;
131
        $unknownRanges = [];
132
133
        echo "Parsing: ";
134
        $rangeCount = 0;
135
136
        while (!$source->eof()) {
137
            $line = $source->fgets();
138
            if (false === $line) {
139
                throw new RuntimeException("Error reading line from scripts file");
140
            }
141
            $dataWithComment = explode('#', $line, 2);
142
            $data = trim($dataWithComment[0] ?? '');
143
            if ('' == $data) {
144
                continue;
145
            }
146
            $rangeWithProp = explode(';', $data);
147
            $unsplittedRange = trim($rangeWithProp[0] ?? null);
148
            $prop = trim($rangeWithProp[1] ?? null);
149
            if (!isset($unsplittedRange, $prop)) {
150
                throw new RuntimeException("Invalid range or property");
151
            }
152
            $splittedRange = explode('..', $unsplittedRange);
153
            $start = hexdec($splittedRange[0]);
154
            $finish = isset($splittedRange[1])
155
                ? hexdec($splittedRange[1])
156
                : $start;
157
            if (!isset($lastKnownCode)) {
158
                if ($start > 0) {
159
                    $unknownRanges[] = new Range(0, $start - 1);
0 ignored issues
show
Bug introduced by
$start - 1 of type double is incompatible with the type integer|null expected by parameter $finish of Remorhaz\UniLex\RegExp\FSM\Range::__construct(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

159
                    $unknownRanges[] = new Range(0, /** @scrutinizer ignore-type */ $start - 1);
Loading history...
160
                }
161
            } elseif ($start - $lastKnownCode > 1) {
162
                $unknownRanges[] = new Range($lastKnownCode + 1, $start - 1);
163
            }
164
            $lastKnownCode = $finish;
165
166
            if (!isset($ranges[$prop])) {
167
                $ranges[$prop] = [];
168
            }
169
            $range = new Range($start, $finish);
0 ignored issues
show
Bug introduced by
It seems like $start can also be of type double; however, parameter $start of Remorhaz\UniLex\RegExp\FSM\Range::__construct() does only seem to accept integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

169
            $range = new Range(/** @scrutinizer ignore-type */ $start, $finish);
Loading history...
170
            $ranges[$prop][] = $range;
171
            echo ".";
172
            $rangeCount++;
173
        }
174
        $source = null;
0 ignored issues
show
Unused Code introduced by
The assignment to $source is dead and can be removed.
Loading history...
175
        $ranges['Unknown'] = $unknownRanges;
176
        echo ". {$rangeCount} ranges\n";
177
178
        return $this->dumpProps($index, $this->buildRangeSets($ranges));
179
    }
180
181
    /**
182
     * @param array $ranges
183
     * @return array
184
     * @throws UniLexException
185
     */
186
    private function buildRangeSets(array $ranges): array
187
    {
188
        echo "Building range sets: ";
189
        $rangeSetCount = 0;
190
        $rangeSets = [];
191
        foreach ($ranges as $prop => $rangeList) {
192
            $rangeSets[$prop] = new RangeSet(...$rangeList);
193
            echo ".";
194
            $rangeSetCount++;
195
        }
196
        echo " {$rangeSetCount} range sets\n";
197
198
        return $rangeSets;
199
    }
200
201
    /**
202
     * @param array $index
203
     * @param array $rangeSets
204
     * @return array
205
     * @throws ReflectionException
206
     */
207
    private function dumpProps(array $index, array $rangeSets): array
208
    {
209
        $rangeSetClass = new ReflectionClass(RangeSet::class);
210
        $rangeClass = new ReflectionClass(Range::class);
211
        foreach ($rangeSets as $prop => $rangeSet) {
212
            $targetFile = self::PROP_DIR . "/{$prop}.php";
213
214
            $phpNodes = [];
215
            $declare = new Declare_([new DeclareDeclare('strict_types', $this->phpBuilder->val(1))]);
216
            $declare->setDocComment(new Doc('/** @noinspection PhpUnhandledExceptionInspection */'));
217
            $phpNodes[] = $declare;
218
            $phpNodes[] = $this->phpBuilder->namespace(__NAMESPACE__ . '\\Properties')->getNode();
219
            $phpNodes[] = $this->phpBuilder->use($rangeClass->getName())->getNode();
220
            $phpNodes[] = $this->phpBuilder->use($rangeSetClass->getName())->getNode();
221
            $phpRanges = [];
222
223
            foreach ($rangeSet->getRanges() as $range) {
224
                $rangeStart = $range->getStart();
225
                $rangeFinish = $range->getFinish();
226
                $phpRangeStart = $this->phpBuilder->val($rangeStart);
227
                $phpRangeStart->setAttribute('kind', LNumber::KIND_HEX);
228
                $phpRangeArgs = [$phpRangeStart];
229
                if ($rangeStart != $rangeFinish) {
230
                    $phpRangeFinish = $this->phpBuilder->val($rangeFinish);
231
                    $phpRangeFinish->setAttribute('kind', LNumber::KIND_HEX);
232
                    $phpRangeArgs[] = $phpRangeFinish;
233
                }
234
                $phpRanges[] = $this->phpBuilder->new($rangeClass->getShortName(), $phpRangeArgs);
235
            }
236
            $phpReturn = new Return_(
237
                $this->phpBuilder->staticCall($rangeSetClass->getShortName(), 'loadUnsafe', $phpRanges)
238
            );
239
            $phpReturn->setDocComment(new Doc('/** phpcs:disable Generic.Files.LineLength.TooLong */'));
240
            $phpNodes[] = $phpReturn;
241
            file_put_contents(__DIR__ . $targetFile, $this->printer->prettyPrintFile($phpNodes));
242
            $index[$prop] = $targetFile;
243
        }
244
245
        return $index;
246
    }
247
248
    public function dumpIndex(array $index): void
249
    {
250
        $indexCode = "<?php\n\nreturn " . var_export($index, true) . ";\n";
251
        file_put_contents(__DIR__ . '/PropertyIndex.php', $indexCode);
252
    }
253
}
254