Total Complexity | 159 |
Total Lines | 706 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like Iconv often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Iconv, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
37 | final class Iconv |
||
38 | { |
||
39 | public const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string'; |
||
40 | public const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed'; |
||
41 | |||
42 | public static $inputEncoding = 'utf-8'; |
||
43 | public static $outputEncoding = 'utf-8'; |
||
44 | public static $internalEncoding = 'utf-8'; |
||
45 | |||
46 | private static $alias = [ |
||
47 | 'utf8' => 'utf-8', |
||
48 | 'ascii' => 'us-ascii', |
||
49 | 'tis-620' => 'iso-8859-11', |
||
50 | 'cp1250' => 'windows-1250', |
||
51 | 'cp1251' => 'windows-1251', |
||
52 | 'cp1252' => 'windows-1252', |
||
53 | 'cp1253' => 'windows-1253', |
||
54 | 'cp1254' => 'windows-1254', |
||
55 | 'cp1255' => 'windows-1255', |
||
56 | 'cp1256' => 'windows-1256', |
||
57 | 'cp1257' => 'windows-1257', |
||
58 | 'cp1258' => 'windows-1258', |
||
59 | 'shift-jis' => 'cp932', |
||
60 | 'shift_jis' => 'cp932', |
||
61 | 'latin1' => 'iso-8859-1', |
||
62 | 'latin2' => 'iso-8859-2', |
||
63 | 'latin3' => 'iso-8859-3', |
||
64 | 'latin4' => 'iso-8859-4', |
||
65 | 'latin5' => 'iso-8859-9', |
||
66 | 'latin6' => 'iso-8859-10', |
||
67 | 'latin7' => 'iso-8859-13', |
||
68 | 'latin8' => 'iso-8859-14', |
||
69 | 'latin9' => 'iso-8859-15', |
||
70 | 'latin10' => 'iso-8859-16', |
||
71 | 'iso8859-1' => 'iso-8859-1', |
||
72 | 'iso8859-2' => 'iso-8859-2', |
||
73 | 'iso8859-3' => 'iso-8859-3', |
||
74 | 'iso8859-4' => 'iso-8859-4', |
||
75 | 'iso8859-5' => 'iso-8859-5', |
||
76 | 'iso8859-6' => 'iso-8859-6', |
||
77 | 'iso8859-7' => 'iso-8859-7', |
||
78 | 'iso8859-8' => 'iso-8859-8', |
||
79 | 'iso8859-9' => 'iso-8859-9', |
||
80 | 'iso8859-10' => 'iso-8859-10', |
||
81 | 'iso8859-11' => 'iso-8859-11', |
||
82 | 'iso8859-12' => 'iso-8859-12', |
||
83 | 'iso8859-13' => 'iso-8859-13', |
||
84 | 'iso8859-14' => 'iso-8859-14', |
||
85 | 'iso8859-15' => 'iso-8859-15', |
||
86 | 'iso8859-16' => 'iso-8859-16', |
||
87 | 'iso_8859-1' => 'iso-8859-1', |
||
88 | 'iso_8859-2' => 'iso-8859-2', |
||
89 | 'iso_8859-3' => 'iso-8859-3', |
||
90 | 'iso_8859-4' => 'iso-8859-4', |
||
91 | 'iso_8859-5' => 'iso-8859-5', |
||
92 | 'iso_8859-6' => 'iso-8859-6', |
||
93 | 'iso_8859-7' => 'iso-8859-7', |
||
94 | 'iso_8859-8' => 'iso-8859-8', |
||
95 | 'iso_8859-9' => 'iso-8859-9', |
||
96 | 'iso_8859-10' => 'iso-8859-10', |
||
97 | 'iso_8859-11' => 'iso-8859-11', |
||
98 | 'iso_8859-12' => 'iso-8859-12', |
||
99 | 'iso_8859-13' => 'iso-8859-13', |
||
100 | 'iso_8859-14' => 'iso-8859-14', |
||
101 | 'iso_8859-15' => 'iso-8859-15', |
||
102 | 'iso_8859-16' => 'iso-8859-16', |
||
103 | 'iso88591' => 'iso-8859-1', |
||
104 | 'iso88592' => 'iso-8859-2', |
||
105 | 'iso88593' => 'iso-8859-3', |
||
106 | 'iso88594' => 'iso-8859-4', |
||
107 | 'iso88595' => 'iso-8859-5', |
||
108 | 'iso88596' => 'iso-8859-6', |
||
109 | 'iso88597' => 'iso-8859-7', |
||
110 | 'iso88598' => 'iso-8859-8', |
||
111 | 'iso88599' => 'iso-8859-9', |
||
112 | 'iso885910' => 'iso-8859-10', |
||
113 | 'iso885911' => 'iso-8859-11', |
||
114 | 'iso885912' => 'iso-8859-12', |
||
115 | 'iso885913' => 'iso-8859-13', |
||
116 | 'iso885914' => 'iso-8859-14', |
||
117 | 'iso885915' => 'iso-8859-15', |
||
118 | 'iso885916' => 'iso-8859-16', |
||
119 | ]; |
||
120 | private static $translitMap = []; |
||
121 | private static $convertMap = []; |
||
122 | private static $errorHandler; |
||
|
|||
123 | private static $lastError; |
||
124 | |||
125 | private static $ulenMask = ["\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4]; |
||
126 | private static $isValidUtf8; |
||
127 | |||
128 | public static function iconv($inCharset, $outCharset, $str) |
||
129 | { |
||
130 | $str = (string) $str; |
||
131 | if ('' === $str) { |
||
132 | return ''; |
||
133 | } |
||
134 | |||
135 | // Prepare for //IGNORE and //TRANSLIT |
||
136 | |||
137 | $translit = $ignore = ''; |
||
138 | |||
139 | $outCharset = strtolower($outCharset); |
||
140 | $inCharset = strtolower($inCharset); |
||
141 | |||
142 | if ('' === $outCharset) { |
||
143 | $outCharset = 'iso-8859-1'; |
||
144 | } |
||
145 | if ('' === $inCharset) { |
||
146 | $inCharset = 'iso-8859-1'; |
||
147 | } |
||
148 | |||
149 | do { |
||
150 | $loop = false; |
||
151 | |||
152 | if ('//translit' === substr($outCharset, -10)) { |
||
153 | $loop = $translit = true; |
||
154 | $outCharset = substr($outCharset, 0, -10); |
||
155 | } |
||
156 | |||
157 | if ('//ignore' === substr($outCharset, -8)) { |
||
158 | $loop = $ignore = true; |
||
159 | $outCharset = substr($outCharset, 0, -8); |
||
160 | } |
||
161 | } while ($loop); |
||
162 | |||
163 | do { |
||
164 | $loop = false; |
||
165 | |||
166 | if ('//translit' === substr($inCharset, -10)) { |
||
167 | $loop = true; |
||
168 | $inCharset = substr($inCharset, 0, -10); |
||
169 | } |
||
170 | |||
171 | if ('//ignore' === substr($inCharset, -8)) { |
||
172 | $loop = true; |
||
173 | $inCharset = substr($inCharset, 0, -8); |
||
174 | } |
||
175 | } while ($loop); |
||
176 | |||
177 | if (isset(self::$alias[$inCharset])) { |
||
178 | $inCharset = self::$alias[$inCharset]; |
||
179 | } |
||
180 | if (isset(self::$alias[$outCharset])) { |
||
181 | $outCharset = self::$alias[$outCharset]; |
||
182 | } |
||
183 | |||
184 | // Load charset maps |
||
185 | |||
186 | if (('utf-8' !== $inCharset && !self::loadMap('from.', $inCharset, $inMap)) |
||
187 | || ('utf-8' !== $outCharset && !self::loadMap('to.', $outCharset, $outMap))) { |
||
188 | trigger_error(sprintf(self::ERROR_WRONG_CHARSET, $inCharset, $outCharset)); |
||
189 | |||
190 | return false; |
||
191 | } |
||
192 | |||
193 | if ('utf-8' !== $inCharset) { |
||
194 | // Convert input to UTF-8 |
||
195 | $result = ''; |
||
196 | if (self::mapToUtf8($result, $inMap, $str, $ignore)) { |
||
197 | $str = $result; |
||
198 | } else { |
||
199 | $str = false; |
||
200 | } |
||
201 | self::$isValidUtf8 = true; |
||
202 | } else { |
||
203 | self::$isValidUtf8 = preg_match('//u', $str); |
||
204 | |||
205 | if (!self::$isValidUtf8 && !$ignore) { |
||
206 | trigger_error(self::ERROR_ILLEGAL_CHARACTER); |
||
207 | |||
208 | return false; |
||
209 | } |
||
210 | |||
211 | if ('utf-8' === $outCharset) { |
||
212 | // UTF-8 validation |
||
213 | $str = self::utf8ToUtf8($str, $ignore); |
||
214 | } |
||
215 | } |
||
216 | |||
217 | if ('utf-8' !== $outCharset && false !== $str) { |
||
218 | // Convert output to UTF-8 |
||
219 | $result = ''; |
||
220 | if (self::mapFromUtf8($result, $outMap, $str, $ignore, $translit)) { |
||
221 | return $result; |
||
222 | } |
||
223 | |||
224 | return false; |
||
225 | } |
||
226 | |||
227 | return $str; |
||
228 | } |
||
229 | |||
230 | public static function iconv_mime_decode_headers($str, $mode = 0, $charset = null) |
||
231 | { |
||
232 | if (null === $charset) { |
||
233 | $charset = self::$internalEncoding; |
||
234 | } |
||
235 | |||
236 | if (false !== strpos($str, "\r")) { |
||
237 | $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n"); |
||
238 | } |
||
239 | $str = explode("\n\n", $str, 2); |
||
240 | |||
241 | $headers = []; |
||
242 | |||
243 | $str = preg_split('/\n(?![ \t])/', $str[0]); |
||
244 | foreach ($str as $str) { |
||
245 | $str = self::iconv_mime_decode($str, $mode, $charset); |
||
246 | if (false === $str) { |
||
247 | return false; |
||
248 | } |
||
249 | $str = explode(':', $str, 2); |
||
250 | |||
251 | if (2 === \count($str)) { |
||
252 | if (isset($headers[$str[0]])) { |
||
253 | if (!\is_array($headers[$str[0]])) { |
||
254 | $headers[$str[0]] = [$headers[$str[0]]]; |
||
255 | } |
||
256 | $headers[$str[0]][] = ltrim($str[1]); |
||
257 | } else { |
||
258 | $headers[$str[0]] = ltrim($str[1]); |
||
259 | } |
||
260 | } |
||
261 | } |
||
262 | |||
263 | return $headers; |
||
264 | } |
||
265 | |||
266 | public static function iconv_mime_decode($str, $mode = 0, $charset = null) |
||
267 | { |
||
268 | if (null === $charset) { |
||
269 | $charset = self::$internalEncoding; |
||
270 | } |
||
271 | if (\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) { |
||
272 | $charset .= '//IGNORE'; |
||
273 | } |
||
274 | |||
275 | if (false !== strpos($str, "\r")) { |
||
276 | $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n"); |
||
277 | } |
||
278 | $str = preg_split('/\n(?![ \t])/', rtrim($str), 2); |
||
279 | $str = preg_replace('/[ \t]*\n[ \t]+/', ' ', rtrim($str[0])); |
||
280 | $str = preg_split('/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/', $str, -1, \PREG_SPLIT_DELIM_CAPTURE); |
||
281 | |||
282 | $result = self::iconv('utf-8', $charset, $str[0]); |
||
283 | if (false === $result) { |
||
284 | return false; |
||
285 | } |
||
286 | |||
287 | $i = 1; |
||
288 | $len = \count($str); |
||
289 | |||
290 | while ($i < $len) { |
||
291 | $c = strtolower($str[$i]); |
||
292 | if ((\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) |
||
293 | && 'utf-8' !== $c |
||
294 | && !isset(self::$alias[$c]) |
||
295 | && !self::loadMap('from.', $c, $d)) { |
||
296 | $d = false; |
||
297 | } elseif ('B' === strtoupper($str[$i + 1])) { |
||
298 | $d = base64_decode($str[$i + 2]); |
||
299 | } else { |
||
300 | $d = rawurldecode(strtr(str_replace('%', '%25', $str[$i + 2]), '=_', '% ')); |
||
301 | } |
||
302 | |||
303 | if (false !== $d) { |
||
304 | if ('' !== $d) { |
||
305 | if ('' === $d = self::iconv($c, $charset, $d)) { |
||
306 | $str[$i + 3] = substr($str[$i + 3], 1); |
||
307 | } else { |
||
308 | $result .= $d; |
||
309 | } |
||
310 | } |
||
311 | $d = self::iconv('utf-8', $charset, $str[$i + 3]); |
||
312 | if ('' !== trim($d)) { |
||
313 | $result .= $d; |
||
314 | } |
||
315 | } elseif (\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) { |
||
316 | $result .= "=?{$str[$i]}?{$str[$i + 1]}?{$str[$i + 2]}?={$str[$i + 3]}"; |
||
317 | } else { |
||
318 | $result = false; |
||
319 | break; |
||
320 | } |
||
321 | |||
322 | $i += 4; |
||
323 | } |
||
324 | |||
325 | return $result; |
||
326 | } |
||
327 | |||
328 | public static function iconv_get_encoding($type = 'all') |
||
329 | { |
||
330 | switch ($type) { |
||
331 | case 'input_encoding': return self::$inputEncoding; |
||
332 | case 'output_encoding': return self::$outputEncoding; |
||
333 | case 'internal_encoding': return self::$internalEncoding; |
||
334 | } |
||
335 | |||
336 | return [ |
||
337 | 'input_encoding' => self::$inputEncoding, |
||
338 | 'output_encoding' => self::$outputEncoding, |
||
339 | 'internal_encoding' => self::$internalEncoding, |
||
340 | ]; |
||
341 | } |
||
342 | |||
343 | public static function iconv_set_encoding($type, $charset) |
||
344 | { |
||
345 | switch ($type) { |
||
346 | case 'input_encoding': self::$inputEncoding = $charset; break; |
||
347 | case 'output_encoding': self::$outputEncoding = $charset; break; |
||
348 | case 'internal_encoding': self::$internalEncoding = $charset; break; |
||
349 | default: return false; |
||
350 | } |
||
351 | |||
352 | return true; |
||
353 | } |
||
354 | |||
355 | public static function iconv_mime_encode($fieldName, $fieldValue, $pref = null) |
||
356 | { |
||
357 | if (!\is_array($pref)) { |
||
358 | $pref = []; |
||
359 | } |
||
360 | |||
361 | $pref += [ |
||
362 | 'scheme' => 'B', |
||
363 | 'input-charset' => self::$internalEncoding, |
||
364 | 'output-charset' => self::$internalEncoding, |
||
365 | 'line-length' => 76, |
||
366 | 'line-break-chars' => "\r\n", |
||
367 | ]; |
||
368 | |||
369 | if (preg_match('/[\x80-\xFF]/', $fieldName)) { |
||
370 | $fieldName = ''; |
||
371 | } |
||
372 | |||
373 | $scheme = strtoupper(substr($pref['scheme'], 0, 1)); |
||
374 | $in = strtolower($pref['input-charset']); |
||
375 | $out = strtolower($pref['output-charset']); |
||
376 | |||
377 | if ('utf-8' !== $in && false === $fieldValue = self::iconv($in, 'utf-8', $fieldValue)) { |
||
378 | return false; |
||
379 | } |
||
380 | |||
381 | preg_match_all('/./us', $fieldValue, $chars); |
||
382 | |||
383 | $chars = $chars[0] ?? []; |
||
384 | |||
385 | $lineBreak = (int) $pref['line-length']; |
||
386 | $lineStart = "=?{$pref['output-charset']}?{$scheme}?"; |
||
387 | $lineLength = \strlen($fieldName) + 2 + \strlen($lineStart) + 2; |
||
388 | $lineOffset = \strlen($lineStart) + 3; |
||
389 | $lineData = ''; |
||
390 | |||
391 | $fieldValue = []; |
||
392 | |||
393 | $Q = 'Q' === $scheme; |
||
394 | |||
395 | foreach ($chars as $c) { |
||
396 | if ('utf-8' !== $out && false === $c = self::iconv('utf-8', $out, $c)) { |
||
397 | return false; |
||
398 | } |
||
399 | |||
400 | $o = $Q |
||
401 | ? $c = preg_replace_callback( |
||
402 | '/[=_\?\x00-\x1F\x80-\xFF]/', |
||
403 | [__CLASS__, 'qpByteCallback'], |
||
404 | $c |
||
405 | ) |
||
406 | : base64_encode($lineData.$c); |
||
407 | |||
408 | if (isset($o[$lineBreak - $lineLength])) { |
||
409 | if (!$Q) { |
||
410 | $lineData = base64_encode($lineData); |
||
411 | } |
||
412 | $fieldValue[] = $lineStart.$lineData.'?='; |
||
413 | $lineLength = $lineOffset; |
||
414 | $lineData = ''; |
||
415 | } |
||
416 | |||
417 | $lineData .= $c; |
||
418 | $Q && $lineLength += \strlen($c); |
||
419 | } |
||
420 | |||
421 | if ('' !== $lineData) { |
||
422 | if (!$Q) { |
||
423 | $lineData = base64_encode($lineData); |
||
424 | } |
||
425 | $fieldValue[] = $lineStart.$lineData.'?='; |
||
426 | } |
||
427 | |||
428 | return $fieldName.': '.implode($pref['line-break-chars'].' ', $fieldValue); |
||
429 | } |
||
430 | |||
431 | public static function iconv_strlen($s, $encoding = null) |
||
443 | } |
||
444 | |||
445 | public static function strlen1($s, $encoding = null) |
||
446 | { |
||
447 | if (null === $encoding) { |
||
448 | $encoding = self::$internalEncoding; |
||
449 | } |
||
450 | if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) { |
||
451 | return false; |
||
452 | } |
||
453 | |||
454 | return \strlen(utf8_decode($s)); |
||
455 | } |
||
456 | |||
457 | public static function strlen2($s, $encoding = null) |
||
458 | { |
||
459 | if (null === $encoding) { |
||
460 | $encoding = self::$internalEncoding; |
||
461 | } |
||
462 | if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) { |
||
463 | return false; |
||
464 | } |
||
465 | |||
466 | $ulenMask = self::$ulenMask; |
||
467 | |||
468 | $i = 0; |
||
469 | $j = 0; |
||
470 | $len = \strlen($s); |
||
471 | |||
472 | while ($i < $len) { |
||
473 | $u = $s[$i] & "\xF0"; |
||
474 | $i += $ulenMask[$u] ?? 1; |
||
475 | ++$j; |
||
476 | } |
||
477 | |||
478 | return $j; |
||
479 | } |
||
480 | |||
481 | public static function iconv_strpos($haystack, $needle, $offset = 0, $encoding = null) |
||
482 | { |
||
483 | if (null === $encoding) { |
||
484 | $encoding = self::$internalEncoding; |
||
485 | } |
||
486 | |||
487 | if (0 !== stripos($encoding, 'utf-8')) { |
||
488 | if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) { |
||
489 | return false; |
||
490 | } |
||
491 | if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) { |
||
492 | return false; |
||
493 | } |
||
494 | } |
||
495 | |||
496 | if ($offset = (int) $offset) { |
||
497 | $haystack = self::iconv_substr($haystack, $offset, 2147483647, 'utf-8'); |
||
498 | } |
||
499 | $pos = strpos($haystack, $needle); |
||
500 | |||
501 | return false === $pos ? false : ($offset + ($pos ? self::iconv_strlen(substr($haystack, 0, $pos), 'utf-8') : 0)); |
||
502 | } |
||
503 | |||
504 | public static function iconv_strrpos($haystack, $needle, $encoding = null) |
||
505 | { |
||
506 | if (null === $encoding) { |
||
507 | $encoding = self::$internalEncoding; |
||
508 | } |
||
509 | |||
510 | if (0 !== stripos($encoding, 'utf-8')) { |
||
511 | if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) { |
||
512 | return false; |
||
513 | } |
||
514 | if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) { |
||
515 | return false; |
||
516 | } |
||
517 | } |
||
518 | |||
519 | $pos = isset($needle[0]) ? strrpos($haystack, $needle) : false; |
||
520 | |||
521 | return false === $pos ? false : self::iconv_strlen($pos ? substr($haystack, 0, $pos) : $haystack, 'utf-8'); |
||
522 | } |
||
523 | |||
524 | public static function iconv_substr($s, $start, $length = 2147483647, $encoding = null) |
||
525 | { |
||
526 | if (null === $encoding) { |
||
527 | $encoding = self::$internalEncoding; |
||
528 | } |
||
529 | if (0 !== stripos($encoding, 'utf-8')) { |
||
530 | $encoding = null; |
||
531 | } elseif (false === $s = self::iconv($encoding, 'utf-8', $s)) { |
||
532 | return false; |
||
533 | } |
||
534 | |||
535 | $s = (string) $s; |
||
536 | $slen = self::iconv_strlen($s, 'utf-8'); |
||
537 | $start = (int) $start; |
||
538 | |||
539 | if (0 > $start) { |
||
540 | $start += $slen; |
||
541 | } |
||
542 | if (0 > $start) { |
||
543 | if (\PHP_VERSION_ID < 80000) { |
||
544 | return false; |
||
545 | } |
||
546 | |||
547 | $start = 0; |
||
548 | } |
||
549 | if ($start >= $slen) { |
||
550 | return \PHP_VERSION_ID >= 80000 ? '' : false; |
||
551 | } |
||
552 | |||
553 | $rx = $slen - $start; |
||
554 | |||
555 | if (0 > $length) { |
||
556 | $length += $rx; |
||
557 | } |
||
558 | if (0 === $length) { |
||
559 | return ''; |
||
560 | } |
||
561 | if (0 > $length) { |
||
562 | return \PHP_VERSION_ID >= 80000 ? '' : false; |
||
563 | } |
||
564 | |||
565 | if ($length > $rx) { |
||
566 | $length = $rx; |
||
567 | } |
||
568 | |||
569 | $rx = '/^'.($start ? self::pregOffset($start) : '').'('.self::pregOffset($length).')/u'; |
||
570 | |||
571 | $s = preg_match($rx, $s, $s) ? $s[1] : ''; |
||
572 | |||
573 | if (null === $encoding) { |
||
574 | return $s; |
||
575 | } |
||
576 | |||
577 | return self::iconv('utf-8', $encoding, $s); |
||
578 | } |
||
579 | |||
580 | private static function loadMap($type, $charset, &$map) |
||
581 | { |
||
582 | if (!isset(self::$convertMap[$type.$charset])) { |
||
583 | if (false === $map = self::getData($type.$charset)) { |
||
584 | if ('to.' === $type && self::loadMap('from.', $charset, $map)) { |
||
585 | $map = array_flip($map); |
||
586 | } else { |
||
587 | return false; |
||
588 | } |
||
589 | } |
||
590 | |||
591 | self::$convertMap[$type.$charset] = $map; |
||
592 | } else { |
||
593 | $map = self::$convertMap[$type.$charset]; |
||
594 | } |
||
595 | |||
596 | return true; |
||
597 | } |
||
598 | |||
599 | private static function utf8ToUtf8($str, $ignore) |
||
600 | { |
||
601 | $ulenMask = self::$ulenMask; |
||
602 | $valid = self::$isValidUtf8; |
||
603 | |||
604 | $u = $str; |
||
605 | $i = $j = 0; |
||
606 | $len = \strlen($str); |
||
607 | |||
608 | while ($i < $len) { |
||
609 | if ($str[$i] < "\x80") { |
||
610 | $u[$j++] = $str[$i++]; |
||
611 | } else { |
||
612 | $ulen = $str[$i] & "\xF0"; |
||
613 | $ulen = $ulenMask[$ulen] ?? 1; |
||
614 | $uchr = substr($str, $i, $ulen); |
||
615 | |||
616 | if (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr))) { |
||
617 | if ($ignore) { |
||
618 | ++$i; |
||
619 | continue; |
||
620 | } |
||
621 | |||
622 | trigger_error(self::ERROR_ILLEGAL_CHARACTER); |
||
623 | |||
624 | return false; |
||
625 | } |
||
626 | |||
627 | $i += $ulen; |
||
628 | |||
629 | $u[$j++] = $uchr[0]; |
||
630 | |||
631 | isset($uchr[1]) && 0 !== ($u[$j++] = $uchr[1]) |
||
632 | && isset($uchr[2]) && 0 !== ($u[$j++] = $uchr[2]) |
||
633 | && isset($uchr[3]) && 0 !== ($u[$j++] = $uchr[3]); |
||
634 | } |
||
635 | } |
||
636 | |||
637 | return substr($u, 0, $j); |
||
638 | } |
||
639 | |||
640 | private static function mapToUtf8(&$result, array $map, $str, $ignore) |
||
641 | { |
||
642 | $len = \strlen($str); |
||
643 | for ($i = 0; $i < $len; ++$i) { |
||
644 | if (isset($str[$i + 1], $map[$str[$i].$str[$i + 1]])) { |
||
645 | $result .= $map[$str[$i].$str[++$i]]; |
||
646 | } elseif (isset($map[$str[$i]])) { |
||
647 | $result .= $map[$str[$i]]; |
||
648 | } elseif (!$ignore) { |
||
649 | trigger_error(self::ERROR_ILLEGAL_CHARACTER); |
||
650 | |||
651 | return false; |
||
652 | } |
||
653 | } |
||
654 | |||
655 | return true; |
||
656 | } |
||
657 | |||
658 | private static function mapFromUtf8(&$result, array $map, $str, $ignore, $translit) |
||
659 | { |
||
660 | $ulenMask = self::$ulenMask; |
||
661 | $valid = self::$isValidUtf8; |
||
662 | |||
663 | if ($translit && !self::$translitMap) { |
||
664 | self::$translitMap = self::getData('translit'); |
||
665 | } |
||
666 | |||
667 | $i = 0; |
||
668 | $len = \strlen($str); |
||
669 | |||
670 | while ($i < $len) { |
||
671 | if ($str[$i] < "\x80") { |
||
672 | $uchr = $str[$i++]; |
||
673 | } else { |
||
674 | $ulen = $str[$i] & "\xF0"; |
||
675 | $ulen = $ulenMask[$ulen] ?? 1; |
||
676 | $uchr = substr($str, $i, $ulen); |
||
677 | |||
678 | if ($ignore && (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr)))) { |
||
679 | ++$i; |
||
680 | continue; |
||
681 | } |
||
682 | |||
683 | $i += $ulen; |
||
684 | } |
||
685 | |||
686 | if (isset($map[$uchr])) { |
||
687 | $result .= $map[$uchr]; |
||
688 | } elseif ($translit) { |
||
689 | if (isset(self::$translitMap[$uchr])) { |
||
690 | $uchr = self::$translitMap[$uchr]; |
||
691 | } elseif ($uchr >= "\xC3\x80") { |
||
692 | $uchr = \Normalizer::normalize($uchr, \Normalizer::NFD); |
||
693 | |||
694 | if ($uchr[0] < "\x80") { |
||
695 | $uchr = $uchr[0]; |
||
696 | } elseif ($ignore) { |
||
697 | continue; |
||
698 | } else { |
||
699 | return false; |
||
700 | } |
||
701 | } elseif ($ignore) { |
||
702 | continue; |
||
703 | } else { |
||
704 | return false; |
||
705 | } |
||
706 | |||
707 | $str = $uchr.substr($str, $i); |
||
708 | $len = \strlen($str); |
||
709 | $i = 0; |
||
710 | } elseif (!$ignore) { |
||
711 | return false; |
||
712 | } |
||
713 | } |
||
714 | |||
715 | return true; |
||
716 | } |
||
717 | |||
718 | private static function qpByteCallback(array $m) |
||
719 | { |
||
720 | return '='.strtoupper(dechex(\ord($m[0]))); |
||
721 | } |
||
722 | |||
723 | private static function pregOffset($offset) |
||
734 | } |
||
735 | |||
736 | private static function getData($file) |
||
737 | { |
||
743 | } |
||
744 | } |
||
745 |