GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Push — master ( 4e2b38...4ddb6f )
by Carlos
02:07
created

Converter::convertAsChars()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 19
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 10
nc 4
nop 2
dl 0
loc 19
rs 9.9332
c 0
b 0
f 0
1
<?php
2
3
namespace Overtrue\Pinyin;
4
5
class Converter
6
{
7
    private const SEGMENTS_COUNT = 10;
8
    private const WORDS_PATH = __DIR__.'/../data/words-%s.php';
9
    private const CHARS_PATH = __DIR__.'/../data/chars.php';
10
    private const SURNAMES_PATH = __DIR__.'/../data/surnames.php';
11
12
    public const TONE_STYLE_DEFAULT = 'default';
13
    public const TONE_STYLE_NUMBER = 'number';
14
    public const TONE_STYLE_NONE = 'none';
15
16
    protected bool $polyphonic = false;
17
    protected bool $asSurname = false;
18
    protected bool $noWords = false;
19
20
    protected string $yuTo = 'yu';
21
    protected string $toneStyle = self::TONE_STYLE_DEFAULT;
22
23
    protected array $regexps = [
24
        'separator' => '\p{Z}',
25
        'mark' => '\p{M}',
26
        'tab' => "\t"
27
    ];
28
29
    public const REGEXPS = [
30
        'number' => '0-9',
31
        'alphabet' => 'a-zA-Z',
32
        // 中文不带符号
33
        'hans' => '\x{3007}\x{2E80}-\x{2FFF}\x{3100}-\x{312F}\x{31A0}-\x{31EF}\x{3400}-\x{4DBF}\x{4E00}-\x{9FFF}\x{F900}-\x{FAFF}',
34
        // 符号: !"#$%&'()*+,-./:;<=>?@[\]^_{|}~`
35
        'punctuation' => '\p{P}',
36
    ];
37
38
    public function __construct()
39
    {
40
        $this->regexps = \array_merge($this->regexps, self::REGEXPS);
41
    }
42
43
    public static function make(): static
44
    {
45
        return new static();
46
    }
47
48
    public function polyphonic(): static
49
    {
50
        $this->polyphonic = true;
51
52
        return $this;
53
    }
54
55
    public function surname(): static
56
    {
57
        $this->asSurname = true;
58
59
        return $this;
60
    }
61
62
    public function noWords(): static
63
    {
64
        $this->noWords = true;
65
66
        return $this;
67
    }
68
69
    public function onlyHans(): static
70
    {
71
        // 中文汉字不含符号
72
        $this->regexps['hans'] = self::REGEXPS['hans'];
73
74
        return $this->noAlpha()->noNumber()->noPunctuation();
75
    }
76
77
    public function noAlpha(): static
78
    {
79
        unset($this->regexps['alphabet']);
80
81
        return $this;
82
    }
83
84
    public function noNumber(): static
85
    {
86
        unset($this->regexps['number']);
87
88
        return $this;
89
    }
90
91
    public function noPunctuation(): static
92
    {
93
        unset($this->regexps['punctuation']);
94
95
        return $this;
96
    }
97
98
    public function withToneStyle(string $toneStyle): static
99
    {
100
        $this->toneStyle = $toneStyle;
101
102
        return $this;
103
    }
104
105
    public function noTone(): static
106
    {
107
        $this->toneStyle = self::TONE_STYLE_NONE;
108
109
        return $this;
110
    }
111
112
    public function useNumberTone(): static
113
    {
114
        $this->toneStyle = self::TONE_STYLE_NUMBER;
115
116
        return $this;
117
    }
118
119
    public function yuToV(): static
120
    {
121
        $this->yuTo = 'v';
122
123
        return $this;
124
    }
125
126
    public function yuToU(): static
127
    {
128
        $this->yuTo = 'u';
129
130
        return $this;
131
    }
132
133
    public function when(bool $condition, callable $callback): static
134
    {
135
        if ($condition) {
136
            $callback($this);
137
        }
138
139
        return $this;
140
    }
141
142
    public function convert(string $string, callable $beforeSplit = null): Collection
143
    {
144
        // 把原有的数字和汉字分离,避免拼音转换时被误作声调
145
        $string = preg_replace_callback('~[a-z0-9_-]+~i', function ($matches) {
146
            return "\t" . $matches[0];
147
        }, $string);
148
149
        // 过滤掉不保留的字符
150
        $string = \preg_replace(\sprintf('~[^%s]~u', \implode($this->regexps)), '', $string);
151
152
        // 多音字
153
        if ($this->polyphonic) {
154
            return $this->convertAsChars($string, true);
155
        }
156
157
        if ($this->noWords) {
158
            return $this->convertAsChars($string);
159
        }
160
161
        // 替换姓氏
162
        if ($this->asSurname) {
163
            $string = $this->convertSurname($string);
164
        }
165
166
        for ($i = 0; $i < self::SEGMENTS_COUNT; $i++) {
167
            $string = strtr($string, require sprintf(self::WORDS_PATH, $i));
168
        }
169
170
        return $this->split($beforeSplit ? $beforeSplit($string) : $string);
171
    }
172
173
    public function convertAsChars(string $string, bool $polyphonic = false): Collection
174
    {
175
        $map = require self::CHARS_PATH;
176
177
        // split string as chinese chars
178
        $chars = preg_split('//u', $string, -1, PREG_SPLIT_NO_EMPTY);
179
180
        $items = [];
181
        foreach ($chars as $char) {
182
            if (isset($map[$char])) {
183
                if ($polyphonic) {
184
                    $items[$char] = \array_map(fn ($pinyin) => $this->formatTone($pinyin, $this->toneStyle), $map[$char]);
185
                } else {
186
                    $items[$char] = $this->formatTone($map[$char][0], $this->toneStyle);
187
                }
188
            }
189
        }
190
191
        return new Collection($items);
192
    }
193
194
    protected function convertSurname(string $name): string
195
    {
196
        static $surnames = null;
197
        $surnames ??= require self::SURNAMES_PATH;
198
199
        foreach ($surnames as $surname => $pinyin) {
200
            if (\str_starts_with($name, $surname)) {
201
                return $pinyin . \mb_substr($name, \mb_strlen($surname));
202
            }
203
        }
204
205
        return $name;
206
    }
207
208
    protected function split(string $item): Collection
209
    {
210
        $items = \array_values(array_filter(preg_split('/\s+/i', $item)));
211
212
        foreach ($items as $index => $item) {
213
            $items[$index] = $this->formatTone($item, $this->toneStyle);
214
        }
215
216
        return new Collection($items);
217
    }
218
219
    protected function formatTone(string $pinyin, string $style): string
220
    {
221
        $replacements = [
222
            'üē' => ['ue', 1], 'üé' => ['ue', 2], 'üě' => ['ue', 3], 'üè' => ['ue', 4],
223
            'ā' => ['a', 1], 'ē' => ['e', 1], 'ī' => ['i', 1], 'ō' => ['o', 1], 'ū' => ['u', 1], 'ǖ' => ['yu', 1],
224
            'á' => ['a', 2], 'é' => ['e', 2], 'í' => ['i', 2], 'ó' => ['o', 2], 'ú' => ['u', 2], 'ǘ' => ['yu', 2],
225
            'ǎ' => ['a', 3], 'ě' => ['e', 3], 'ǐ' => ['i', 3], 'ǒ' => ['o', 3], 'ǔ' => ['u', 3], 'ǚ' => ['yu', 3],
226
            'à' => ['a', 4], 'è' => ['e', 4], 'ì' => ['i', 4], 'ò' => ['o', 4], 'ù' => ['u', 4], 'ǜ' => ['yu', 4],
227
        ];
228
229
        foreach ($replacements as $unicode => $replacement) {
230
            if (\str_contains($pinyin, $unicode)) {
231
                $umlaut = $replacement[0];
232
233
                if ($umlaut !== 'yu' && $style === self::TONE_STYLE_DEFAULT) {
234
                    continue;
235
                }
236
237
                // https://zh.wikipedia.org/wiki/%C3%9C
238
                if ($this->yuTo !== 'yu') {
239
                    $umlaut = $this->yuTo;
240
                }
241
242
                $pinyin = \str_replace($unicode, $umlaut, $pinyin);
243
244
                if ($this->toneStyle === self::TONE_STYLE_NUMBER) {
245
                    $pinyin .= $replacement[1];
246
                }
247
            }
248
        }
249
250
        return $pinyin;
251
    }
252
}
253