GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.
Passed
Push — master ( ee75bb...4e2b38 )
by Carlos
11:23
created

Converter::yuToV()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 2
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 5
rs 10
1
<?php
2
3
namespace Overtrue\Pinyin;
4
5
class Converter
6
{
7
    private const SEGMENTS_COUNT = 10;
8
    private const WORDS_PATH = __DIR__.'/../data/words-%s.php';
9
    private const CHARS_PATH = __DIR__.'/../data/chars.php';
10
    private const CHARS_WITH_POLYPHONES_PATH = __DIR__.'/../data/chars-with-polyphones.php';
11
    private const SURNAMES_PATH = __DIR__.'/../data/surnames.php';
12
13
    public const TONE_STYLE_DEFAULT = 'default';
14
    public const TONE_STYLE_NUMBER = 'number';
15
    public const TONE_STYLE_NONE = 'none';
16
17
    protected bool $asPolyphonic = false;
18
    protected bool $asSurname = false;
19
20
    protected string $yuTo = 'yu';
21
    protected string $toneStyle = self::TONE_STYLE_DEFAULT;
22
23
    protected array $regexps = [
24
        'separator' => '\p{Z}',
25
        'mark' => '\p{M}',
26
        'tab' => "\t"
27
    ];
28
29
    public const REGEXPS = [
30
        'number' => '0-9',
31
        'alphabet' => 'a-zA-Z',
32
        // 中文不带符号
33
        'hans' => '\x{3007}\x{2E80}-\x{2FFF}\x{3100}-\x{312F}\x{31A0}-\x{31EF}\x{3400}-\x{4DBF}\x{4E00}-\x{9FFF}\x{F900}-\x{FAFF}',
34
        // 符号: !"#$%&'()*+,-./:;<=>?@[\]^_{|}~`
35
        'punctuation' => '\p{P}',
36
    ];
37
38
    public function __construct()
39
    {
40
        $this->regexps = \array_merge($this->regexps, self::REGEXPS);
41
    }
42
43
    public static function make(): static
44
    {
45
        return new static();
46
    }
47
48
    public function asPolyphonic(): static
49
    {
50
        $this->asPolyphonic = true;
51
52
        return $this;
53
    }
54
55
    public function asSurname(): static
56
    {
57
        $this->asSurname = true;
58
59
        return $this;
60
    }
61
62
    public function onlyHans(): static
63
    {
64
        // 中文汉字不含符号
65
        $this->regexps['hans'] = self::REGEXPS['hans'];
66
67
        return $this->noAlpha()->noNumber()->noPunctuation();
68
    }
69
70
    public function noAlpha(): static
71
    {
72
        unset($this->regexps['alphabet']);
73
74
        return $this;
75
    }
76
77
    public function noNumber(): static
78
    {
79
        unset($this->regexps['number']);
80
81
        return $this;
82
    }
83
84
    public function noPunctuation(): static
85
    {
86
        unset($this->regexps['punctuation']);
87
88
        return $this;
89
    }
90
91
    public function withToneStyle(string $toneStyle): static
92
    {
93
        $this->toneStyle = $toneStyle;
94
95
        return $this;
96
    }
97
98
    public function noTone(): static
99
    {
100
        $this->toneStyle = self::TONE_STYLE_NONE;
101
102
        return $this;
103
    }
104
105
    public function useNumberTone(): static
106
    {
107
        $this->toneStyle = self::TONE_STYLE_NUMBER;
108
109
        return $this;
110
    }
111
112
    public function yuToV(): static
113
    {
114
        $this->yuTo = 'v';
115
116
        return $this;
117
    }
118
119
    public function yuToU(): static
120
    {
121
        $this->yuTo = 'u';
122
123
        return $this;
124
    }
125
126
    public function when(bool $condition, callable $callback): static
127
    {
128
        if ($condition) {
129
            $callback($this);
130
        }
131
132
        return $this;
133
    }
134
135
    public function convert(string $string, callable $beforeSplit = null): Collection
136
    {
137
        // 把原有的数字和汉字分离,避免拼音转换时被误作声调
138
        $string = preg_replace_callback('~[a-z0-9_-]+~i', function ($matches) {
139
            return "\t" . $matches[0];
140
        }, $string);
141
142
        // 过滤掉不保留的字符
143
        $string = \preg_replace(\sprintf('~[^%s]~u', \implode($this->regexps)), '', $string);
144
145
        // 多音字
146
        if ($this->asPolyphonic) {
147
            return $this->convertAsPolyphonic($string);
148
        }
149
150
        // 替换姓氏
151
        if ($this->asSurname) {
152
            $string = $this->convertSurname($string);
153
        }
154
155
        for ($i = 0; $i < self::SEGMENTS_COUNT; $i++) {
156
            $string = strtr($string, require sprintf(self::WORDS_PATH, $i));
157
        }
158
159
        return $this->split($beforeSplit ? $beforeSplit($string) : $string);
160
    }
161
162
    protected function convertAsPolyphonic(string $string): Collection
163
    {
164
        // split string as chinese chars
165
        $chars = \preg_split('~['.$this->regexps['hans'].']~u', $string);
166
167
        $string = \strtr($string, require self::CHARS_WITH_POLYPHONES_PATH);
168
169
        return $this->split($string);
170
    }
171
172
    protected function convertSurname(string $name): string
173
    {
174
        static $surnames = null;
175
        $surnames ??= require self::SURNAMES_PATH;
176
177
        foreach ($surnames as $surname => $pinyin) {
178
            if (\str_starts_with($name, $surname)) {
179
                return $pinyin . \mb_substr($name, \mb_strlen($surname));
180
            }
181
        }
182
183
        return $name;
184
    }
185
186
    protected function split(string $pinyin): Collection
187
    {
188
        $items = array_filter(preg_split('/\s+/i', $pinyin));
189
190
        foreach ($items as $index => $pinyin) {
191
            $items[$index] = $this->formatTone($pinyin, $this->toneStyle);
192
        }
193
194
        return new Collection($items);
195
    }
196
197
    protected function formatTone(string $pinyin, string $style): string
198
    {
199
        $replacements = [
200
            'üē' => ['ue', 1], 'üé' => ['ue', 2], 'üě' => ['ue', 3], 'üè' => ['ue', 4],
201
            'ā' => ['a', 1], 'ē' => ['e', 1], 'ī' => ['i', 1], 'ō' => ['o', 1], 'ū' => ['u', 1], 'ǖ' => ['yu', 1],
202
            'á' => ['a', 2], 'é' => ['e', 2], 'í' => ['i', 2], 'ó' => ['o', 2], 'ú' => ['u', 2], 'ǘ' => ['yu', 2],
203
            'ǎ' => ['a', 3], 'ě' => ['e', 3], 'ǐ' => ['i', 3], 'ǒ' => ['o', 3], 'ǔ' => ['u', 3], 'ǚ' => ['yu', 3],
204
            'à' => ['a', 4], 'è' => ['e', 4], 'ì' => ['i', 4], 'ò' => ['o', 4], 'ù' => ['u', 4], 'ǜ' => ['yu', 4],
205
        ];
206
207
        foreach ($replacements as $unicode => $replacement) {
208
            if (\str_contains($pinyin, $unicode)) {
209
                $umlaut = $replacement[0];
210
211
                if ($umlaut !== 'yu' && $style === self::TONE_STYLE_DEFAULT) {
212
                    continue;
213
                }
214
215
                // https://zh.wikipedia.org/wiki/%C3%9C
216
                if ($this->yuTo !== 'yu') {
217
                    $umlaut = $this->yuTo;
218
                }
219
220
                $pinyin = \str_replace($unicode, $umlaut, $pinyin);
221
222
                if ($this->toneStyle === self::TONE_STYLE_NUMBER) {
223
                    $pinyin .= $replacement[1];
224
                }
225
            }
226
        }
227
228
        return $pinyin;
229
    }
230
}
231