UnicodeDataRangeIterator::fetchUnicodeDataRange()   B
last analyzed

Complexity

Conditions 9
Paths 6

Size

Total Lines 35
Code Lines 20

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 9
eloc 20
c 1
b 0
f 0
nc 6
nop 1
dl 0
loc 35
rs 8.0555
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Remorhaz\UCD\Tool;
6
7
use Iterator;
8
use IteratorAggregate;
9
use Remorhaz\IntRangeSets\Range;
10
use Remorhaz\IntRangeSets\RangeInterface;
11
use SplFileObject;
12
use Throwable;
13
14
use function strlen;
15
16
final class UnicodeDataRangeIterator implements IteratorAggregate
17
{
18
19
    private $file;
20
21
    private $onProgress;
22
23
    private $code;
24
25
    private $name;
26
27
    private $prop;
28
29
    private $lastCode;
30
31
    private $lastProp;
32
33
    private $rangeStart;
34
35
    private $namedStarts = [];
36
37
    public function __construct(SplFileObject $file, callable $onProgress)
38
    {
39
        $this->file = $file;
40
        $this->onProgress = $onProgress;
41
    }
42
43
    public function getIterator(): Iterator
44
    {
45
        while (!$this->file->eof()) {
46
            $line = $this->fetchNextLine($this->file);
47
            if (!isset($line)) {
48
                continue;
49
            }
50
            $range = $this->fetchUnicodeDataRange($line);
51
            if (isset($range)) {
52
                yield $this->lastProp => $range;
53
            }
54
55
            $this->lastCode = $this->code;
56
            $this->lastProp = $this->prop;
57
58
            ($this->onProgress)(strlen($line));
59
        }
60
    }
61
62
    private function fetchNextLine(SplFileObject $file): ?string
63
    {
64
        $line = $file->fgets();
65
        if (false === $line) {
66
            throw new Exception\LineNotReadException($file->getFilename());
67
        }
68
69
        return '' == $line ? null : $line;
70
    }
71
72
    private function parseUnicodeDataLineLine(string $line): void
73
    {
74
        $splitLine = explode(';', $line);
75
        $codeHex = $splitLine[0] ?? null;
76
        $name = $splitLine[1] ?? null;
77
        $prop = $splitLine[2] ?? null;
78
        if (!isset($codeHex, $name, $prop)) {
79
            throw new Exception\InvalidLineException($line);
80
        }
81
        $this->code = hexdec($codeHex);
82
        $this->name = $name;
83
        $this->prop = $prop;
84
    }
85
86
    private function fetchUnicodeDataRange(string $line): ?RangeInterface
87
    {
88
        $this->parseUnicodeDataLineLine($line);
89
90
        [$firstName, $lastName] = $this->parseRangeBoundary($this->name);
91
        if (isset($firstName)) {
92
            $this->namedStarts[$firstName] = $this->code;
93
            $this->rangeStart = null;
94
95
            return null;
96
        }
97
98
        if (isset($lastName)) {
99
            if (
100
                !isset($this->namedStarts[$lastName]) ||
101
                isset($this->rangeStart) ||
102
                $this->lastCode !== $this->namedStarts[$lastName]
103
            ) {
104
                throw new Exception\InvalidLineException($line);
105
            }
106
107
            return $this->createRange($this->lastCode, $this->code);
108
        }
109
110
        if ($this->prop === $this->lastProp && $this->code - 1 === $this->lastCode) {
111
            return null;
112
        }
113
114
        $range = isset($this->rangeStart, $this->lastCode)
115
            ? $this->createRange($this->rangeStart, $this->lastCode)
116
            : null;
117
118
        $this->rangeStart = $this->code;
119
120
        return $range;
121
    }
122
123
    private function parseRangeBoundary(string $name): array
124
    {
125
        try {
126
            $isFirst = 1 === \Safe\preg_match('#^<(.+), First>$#', $name, $matches);
127
            if ($isFirst) {
128
                return [$matches[1] ?? null, null];
129
            }
130
131
            $isLast = 1 === \Safe\preg_match('#^<(.+), Last>$#', $name, $matches);
132
133
            return $isLast
134
                ? [null, $matches[1] ?? null]
135
                : [null, null];
136
        } catch (Throwable $e) {
137
            throw new Exception\CodePointNameNotParsedException($name, $e);
138
        }
139
    }
140
141
    private function createRange(int $start, ?int $finish): RangeInterface
142
    {
143
        try {
144
            return new Range($start, $finish);
145
        } catch (Throwable $e) {
146
            throw new Exception\RangeNotCreatedException($e);
147
        }
148
    }
149
}
150