1 | <?php |
||
27 | class Pinyin |
||
28 | { |
||
29 | /** |
||
30 | * Dictionary. |
||
31 | * |
||
32 | * @var array |
||
33 | */ |
||
34 | protected static $dictionary = array(); |
||
35 | |||
36 | /** |
||
37 | * Settings. |
||
38 | * |
||
39 | * @var array |
||
40 | */ |
||
41 | protected static $settings = array( |
||
42 | 'delimiter' => ' ', |
||
43 | 'accent' => true, |
||
44 | 'only_chinese' => false, |
||
45 | 'uppercase' => false, |
||
46 | 'charset' => 'UTF-8' // GB2312,UTF-8 |
||
47 | ); |
||
48 | /** |
||
49 | * Internal charset used by this package. |
||
50 | * |
||
51 | * @var string |
||
52 | */ |
||
53 | protected static $internalCharset = 'UTF-8'; |
||
54 | |||
55 | /** |
||
56 | * The instance. |
||
57 | * |
||
58 | * @var \Overtrue\Pinyin\Pinyin |
||
59 | */ |
||
60 | private static $_instance; |
||
61 | |||
62 | /** |
||
63 | * Constructor. |
||
64 | * |
||
65 | * set dictionary path. |
||
66 | */ |
||
67 | private function __construct() |
||
68 | 1 | { |
|
69 | if (empty(static::$dictionary)) { |
||
70 | 1 | $list = json_decode(file_get_contents(dirname(__DIR__).'/data/dict.php'), true); |
|
71 | 1 | static::appends($list); |
|
72 | 1 | } |
|
73 | 1 | } |
|
74 | |||
75 | /** |
||
76 | * Disable clone. |
||
77 | */ |
||
78 | private function __clone() |
||
81 | |||
82 | /** |
||
83 | * Get class instance. |
||
84 | * |
||
85 | * @return \Overtrue\Pinyin\Pinyin |
||
86 | */ |
||
87 | 14 | public static function getInstance() |
|
88 | { |
||
89 | 14 | if (is_null(self::$_instance)) { |
|
90 | 1 | self::$_instance = new static(); |
|
91 | 1 | } |
|
92 | |||
93 | 14 | return self::$_instance; |
|
94 | } |
||
95 | |||
96 | /** |
||
97 | * Setter. |
||
98 | * |
||
99 | * @param string $key |
||
100 | * @param mixed $value |
||
101 | */ |
||
102 | 3 | public static function set($key, $value) |
|
106 | |||
107 | /** |
||
108 | * Global settings. |
||
109 | * |
||
110 | * @param array $settings settings. |
||
111 | */ |
||
112 | public static function settings(array $settings = array()) |
||
116 | |||
117 | /** |
||
118 | * Chinese to pinyin. |
||
119 | * |
||
120 | * @param string $string source string. |
||
121 | * @param array $settings settings. |
||
122 | * |
||
123 | * @return string |
||
124 | */ |
||
125 | 12 | public static function trans($string, array $settings = array()) |
|
131 | |||
132 | /** |
||
133 | * Get first letters of string. |
||
134 | * |
||
135 | * @param string $string source string. |
||
136 | * @param string $settings settings |
||
137 | * |
||
138 | * @return string |
||
139 | */ |
||
140 | 2 | public static function letter($string, array $settings = array()) |
|
148 | |||
149 | /** |
||
150 | * Parse the string to pinyin. |
||
151 | * |
||
152 | * Overtrue\Pinyin\Pinyin::parse('带着梦想旅行'); |
||
153 | * |
||
154 | * @param string $string |
||
155 | * @param array $settings |
||
156 | * |
||
157 | * @return array |
||
158 | */ |
||
159 | 14 | public static function parse($string, array $settings = array()) |
|
196 | |||
197 | /** |
||
198 | * Add custom words. |
||
199 | * |
||
200 | * @param array $appends |
||
201 | */ |
||
202 | 1 | public static function appends(array $appends) |
|
203 | { |
||
204 | 1 | $list = static::formatWords($appends); |
|
205 | 1 | foreach ($list as $key => $value) { |
|
206 | $firstChar = mb_substr($key, 0, 1, static::$internalCharset); |
||
207 | self::$dictionary[$firstChar][$key] = $value; |
||
208 | } |
||
209 | } |
||
210 | |||
211 | /** |
||
212 | * Get first letters from pinyin. |
||
213 | * |
||
214 | * @param string $pinyin |
||
215 | 14 | * @param array $settings |
|
216 | * |
||
217 | 14 | * @return string |
|
218 | */ |
||
219 | 14 | protected function getFirstLetters($pinyin, $settings) |
|
220 | { |
||
221 | 14 | $letterCase = $settings['uppercase'] ? 'strtoupper' : 'strtolower'; |
|
222 | 14 | ||
223 | 1 | $letters = array(); |
|
224 | |||
225 | foreach (explode(' ', $pinyin) as $word) { |
||
226 | 14 | if (empty($word)) { |
|
227 | continue; |
||
228 | 14 | } |
|
229 | 14 | ||
230 | 14 | $ord = ord(strtolower($word{0})); |
|
231 | 14 | ||
232 | if ($ord >= 97 && $ord <= 122) { |
||
233 | 14 | $letters[] = $letterCase($word{0}); |
|
234 | } |
||
235 | } |
||
236 | |||
237 | return implode($settings['delimiter'], $letters); |
||
238 | } |
||
239 | |||
240 | /** |
||
241 | * Replace string to pinyin. |
||
242 | * |
||
243 | 14 | * @param string $string |
|
244 | * |
||
245 | 14 | * @return string |
|
246 | */ |
||
247 | 14 | protected function string2pinyin($string) |
|
248 | { |
||
249 | $preparedString = $this->prepare($string); |
||
250 | $count = mb_strlen($preparedString, static::$internalCharset); |
||
251 | $dictionary = array(); |
||
252 | |||
253 | $i = 0; |
||
254 | while ($i < $count) { |
||
255 | $char = mb_substr($preparedString, $i++, 1, static::$internalCharset); |
||
256 | $pinyinGroup = isset(self::$dictionary[$char]) ? self::$dictionary[$char] : array(); |
||
257 | 1 | $dictionary = array_merge($dictionary, $pinyinGroup); |
|
258 | } |
||
259 | 1 | ||
260 | 1 | $pinyin = strtr($preparedString, $dictionary); |
|
261 | 1 | ||
262 | return trim(str_replace(' ', ' ', $pinyin)); |
||
263 | 1 | } |
|
264 | |||
265 | /** |
||
266 | * Format user's words. |
||
267 | * |
||
268 | * @param array $words |
||
269 | * |
||
270 | * @return array |
||
271 | */ |
||
272 | public static function formatWords($words) |
||
273 | { |
||
274 | foreach ($words as $word => $pinyin) { |
||
275 | 1 | $words[$word] = static::formatDictPinyin($pinyin); |
|
276 | 1 | } |
|
277 | 1 | ||
278 | return $words; |
||
279 | } |
||
280 | |||
281 | /** |
||
282 | * Format pinyin to lowercase. |
||
283 | * |
||
284 | * @param string $pinyin pinyin string. |
||
285 | * |
||
286 | * @return string |
||
287 | */ |
||
288 | protected static function formatDictPinyin($pinyin) |
||
289 | { |
||
290 | $pinyin = trim($pinyin); |
||
291 | |||
292 | return preg_replace_callback('/[a-z]{1,}:?\d{1}\s?/i', function ($matches) { |
||
293 | return strtolower($matches[0]); |
||
294 | }, " {$pinyin} "); |
||
295 | } |
||
296 | |||
297 | /** |
||
298 | * Check if the string has Chinese characters. |
||
299 | 2 | * |
|
300 | * @param string $string string to check. |
||
301 | 2 | * |
|
302 | * @return int |
||
303 | */ |
||
304 | protected function containChinese($string) |
||
305 | { |
||
306 | return preg_match('/\p{Han}+/u', $string); |
||
307 | } |
||
308 | |||
309 | /** |
||
310 | * Remove the non-Chinese characters. |
||
311 | 14 | * |
|
312 | * @param string $string source string. |
||
313 | * |
||
314 | 14 | * @return string |
|
315 | 14 | */ |
|
316 | public function justChinese($string) |
||
317 | 14 | { |
|
318 | return preg_replace('/[^\p{Han}]/u', '', $string); |
||
319 | } |
||
320 | |||
321 | /** |
||
322 | * Prepare the string. |
||
323 | * |
||
324 | * @param string $string source string. |
||
325 | 14 | * |
|
326 | * @return string |
||
327 | 14 | */ |
|
328 | protected function prepare($string) |
||
329 | { |
||
330 | $pattern = array( |
||
331 | '/([A-z])(\d)/' => '$1\\\\\2', // test4 => test\\4 |
||
332 | ); |
||
333 | |||
334 | return preg_replace(array_keys($pattern), $pattern, $string); |
||
335 | } |
||
336 | |||
337 | 5 | /** |
|
338 | * Add delimiter. |
||
339 | * |
||
340 | 5 | * @param string $string |
|
341 | 5 | */ |
|
342 | 5 | protected function delimit($string, $delimiter = '') |
|
346 | |||
347 | /** |
||
348 | * Remove tone. |
||
349 | * |
||
350 | * @param string $string string with tone. |
||
351 | * |
||
352 | * @return string |
||
353 | */ |
||
354 | protected function removeTone($string) |
||
363 | |||
364 | /** |
||
365 | * Credits for these 2 functions go to Bouke Versteegh, who shared these |
||
366 | * at http://stackoverflow.com/questions/1598856/convert-numbered-to-accentuated-pinyin. |
||
367 | * |
||
368 | * @param string $string The pinyin string with tone numbers, i.e. "ni3 hao3" |
||
369 | 11 | * |
|
370 | * @return string The formatted string with tone marks, i.e. |
||
371 | 11 | */ |
|
372 | protected function addAccents($string) |
||
380 | 1 | ||
381 | /** |
||
382 | 1 | * Helper callback. |
|
383 | 1 | * |
|
384 | 1 | * @param array $match |
|
385 | 1 | */ |
|
386 | 1 | protected function addAccentsCallback($match) |
|
387 | { |
||
388 | static $accentmap = null; |
||
389 | 1 | ||
429 | }//end class |
||
430 | |||
431 |
If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:
If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.