1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/* |
4
|
|
|
* This file is part of the Symfony package. |
5
|
|
|
* |
6
|
|
|
* (c) Fabien Potencier <[email protected]> |
7
|
|
|
* |
8
|
|
|
* For the full copyright and license information, please view the LICENSE |
9
|
|
|
* file that was distributed with this source code. |
10
|
|
|
*/ |
11
|
|
|
|
12
|
|
|
namespace Symfony\Polyfill\Iconv; |
13
|
|
|
|
14
|
|
|
/** |
15
|
|
|
* iconv implementation in pure PHP, UTF-8 centric. |
16
|
|
|
* |
17
|
|
|
* Implemented: |
18
|
|
|
* - iconv - Convert string to requested character encoding |
19
|
|
|
* - iconv_mime_decode - Decodes a MIME header field |
20
|
|
|
* - iconv_mime_decode_headers - Decodes multiple MIME header fields at once |
21
|
|
|
* - iconv_get_encoding - Retrieve internal configuration variables of iconv extension |
22
|
|
|
* - iconv_set_encoding - Set current setting for character encoding conversion |
23
|
|
|
* - iconv_mime_encode - Composes a MIME header field |
24
|
|
|
* - iconv_strlen - Returns the character count of string |
25
|
|
|
* - iconv_strpos - Finds position of first occurrence of a needle within a haystack |
26
|
|
|
* - iconv_strrpos - Finds the last occurrence of a needle within a haystack |
27
|
|
|
* - iconv_substr - Cut out part of a string |
28
|
|
|
* |
29
|
|
|
* Charsets available for conversion are defined by files |
30
|
|
|
* in the charset/ directory and by Iconv::$alias below. |
31
|
|
|
* You're welcome to send back any addition you make. |
32
|
|
|
* |
33
|
|
|
* @author Nicolas Grekas <[email protected]> |
34
|
|
|
* |
35
|
|
|
* @internal |
36
|
|
|
*/ |
37
|
|
|
final class Iconv |
38
|
|
|
{ |
39
|
|
|
public const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string'; |
40
|
|
|
public const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed'; |
41
|
|
|
|
42
|
|
|
public static $inputEncoding = 'utf-8'; |
43
|
|
|
public static $outputEncoding = 'utf-8'; |
44
|
|
|
public static $internalEncoding = 'utf-8'; |
45
|
|
|
|
46
|
|
|
private static $alias = [ |
47
|
|
|
'utf8' => 'utf-8', |
48
|
|
|
'ascii' => 'us-ascii', |
49
|
|
|
'tis-620' => 'iso-8859-11', |
50
|
|
|
'cp1250' => 'windows-1250', |
51
|
|
|
'cp1251' => 'windows-1251', |
52
|
|
|
'cp1252' => 'windows-1252', |
53
|
|
|
'cp1253' => 'windows-1253', |
54
|
|
|
'cp1254' => 'windows-1254', |
55
|
|
|
'cp1255' => 'windows-1255', |
56
|
|
|
'cp1256' => 'windows-1256', |
57
|
|
|
'cp1257' => 'windows-1257', |
58
|
|
|
'cp1258' => 'windows-1258', |
59
|
|
|
'shift-jis' => 'cp932', |
60
|
|
|
'shift_jis' => 'cp932', |
61
|
|
|
'latin1' => 'iso-8859-1', |
62
|
|
|
'latin2' => 'iso-8859-2', |
63
|
|
|
'latin3' => 'iso-8859-3', |
64
|
|
|
'latin4' => 'iso-8859-4', |
65
|
|
|
'latin5' => 'iso-8859-9', |
66
|
|
|
'latin6' => 'iso-8859-10', |
67
|
|
|
'latin7' => 'iso-8859-13', |
68
|
|
|
'latin8' => 'iso-8859-14', |
69
|
|
|
'latin9' => 'iso-8859-15', |
70
|
|
|
'latin10' => 'iso-8859-16', |
71
|
|
|
'iso8859-1' => 'iso-8859-1', |
72
|
|
|
'iso8859-2' => 'iso-8859-2', |
73
|
|
|
'iso8859-3' => 'iso-8859-3', |
74
|
|
|
'iso8859-4' => 'iso-8859-4', |
75
|
|
|
'iso8859-5' => 'iso-8859-5', |
76
|
|
|
'iso8859-6' => 'iso-8859-6', |
77
|
|
|
'iso8859-7' => 'iso-8859-7', |
78
|
|
|
'iso8859-8' => 'iso-8859-8', |
79
|
|
|
'iso8859-9' => 'iso-8859-9', |
80
|
|
|
'iso8859-10' => 'iso-8859-10', |
81
|
|
|
'iso8859-11' => 'iso-8859-11', |
82
|
|
|
'iso8859-12' => 'iso-8859-12', |
83
|
|
|
'iso8859-13' => 'iso-8859-13', |
84
|
|
|
'iso8859-14' => 'iso-8859-14', |
85
|
|
|
'iso8859-15' => 'iso-8859-15', |
86
|
|
|
'iso8859-16' => 'iso-8859-16', |
87
|
|
|
'iso_8859-1' => 'iso-8859-1', |
88
|
|
|
'iso_8859-2' => 'iso-8859-2', |
89
|
|
|
'iso_8859-3' => 'iso-8859-3', |
90
|
|
|
'iso_8859-4' => 'iso-8859-4', |
91
|
|
|
'iso_8859-5' => 'iso-8859-5', |
92
|
|
|
'iso_8859-6' => 'iso-8859-6', |
93
|
|
|
'iso_8859-7' => 'iso-8859-7', |
94
|
|
|
'iso_8859-8' => 'iso-8859-8', |
95
|
|
|
'iso_8859-9' => 'iso-8859-9', |
96
|
|
|
'iso_8859-10' => 'iso-8859-10', |
97
|
|
|
'iso_8859-11' => 'iso-8859-11', |
98
|
|
|
'iso_8859-12' => 'iso-8859-12', |
99
|
|
|
'iso_8859-13' => 'iso-8859-13', |
100
|
|
|
'iso_8859-14' => 'iso-8859-14', |
101
|
|
|
'iso_8859-15' => 'iso-8859-15', |
102
|
|
|
'iso_8859-16' => 'iso-8859-16', |
103
|
|
|
'iso88591' => 'iso-8859-1', |
104
|
|
|
'iso88592' => 'iso-8859-2', |
105
|
|
|
'iso88593' => 'iso-8859-3', |
106
|
|
|
'iso88594' => 'iso-8859-4', |
107
|
|
|
'iso88595' => 'iso-8859-5', |
108
|
|
|
'iso88596' => 'iso-8859-6', |
109
|
|
|
'iso88597' => 'iso-8859-7', |
110
|
|
|
'iso88598' => 'iso-8859-8', |
111
|
|
|
'iso88599' => 'iso-8859-9', |
112
|
|
|
'iso885910' => 'iso-8859-10', |
113
|
|
|
'iso885911' => 'iso-8859-11', |
114
|
|
|
'iso885912' => 'iso-8859-12', |
115
|
|
|
'iso885913' => 'iso-8859-13', |
116
|
|
|
'iso885914' => 'iso-8859-14', |
117
|
|
|
'iso885915' => 'iso-8859-15', |
118
|
|
|
'iso885916' => 'iso-8859-16', |
119
|
|
|
]; |
120
|
|
|
private static $translitMap = []; |
121
|
|
|
private static $convertMap = []; |
122
|
|
|
private static $errorHandler; |
|
|
|
|
123
|
|
|
private static $lastError; |
|
|
|
|
124
|
|
|
|
125
|
|
|
private static $ulenMask = ["\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4]; |
126
|
|
|
private static $isValidUtf8; |
127
|
|
|
|
128
|
|
|
public static function iconv($inCharset, $outCharset, $str) |
129
|
|
|
{ |
130
|
|
|
$str = (string) $str; |
131
|
|
|
if ('' === $str) { |
132
|
|
|
return ''; |
133
|
|
|
} |
134
|
|
|
|
135
|
|
|
// Prepare for //IGNORE and //TRANSLIT |
136
|
|
|
|
137
|
|
|
$translit = $ignore = ''; |
138
|
|
|
|
139
|
|
|
$outCharset = strtolower($outCharset); |
140
|
|
|
$inCharset = strtolower($inCharset); |
141
|
|
|
|
142
|
|
|
if ('' === $outCharset) { |
143
|
|
|
$outCharset = 'iso-8859-1'; |
144
|
|
|
} |
145
|
|
|
if ('' === $inCharset) { |
146
|
|
|
$inCharset = 'iso-8859-1'; |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
do { |
150
|
|
|
$loop = false; |
151
|
|
|
|
152
|
|
|
if ('//translit' === substr($outCharset, -10)) { |
153
|
|
|
$loop = $translit = true; |
154
|
|
|
$outCharset = substr($outCharset, 0, -10); |
155
|
|
|
} |
156
|
|
|
|
157
|
|
|
if ('//ignore' === substr($outCharset, -8)) { |
158
|
|
|
$loop = $ignore = true; |
159
|
|
|
$outCharset = substr($outCharset, 0, -8); |
160
|
|
|
} |
161
|
|
|
} while ($loop); |
162
|
|
|
|
163
|
|
|
do { |
164
|
|
|
$loop = false; |
165
|
|
|
|
166
|
|
|
if ('//translit' === substr($inCharset, -10)) { |
167
|
|
|
$loop = true; |
168
|
|
|
$inCharset = substr($inCharset, 0, -10); |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
if ('//ignore' === substr($inCharset, -8)) { |
172
|
|
|
$loop = true; |
173
|
|
|
$inCharset = substr($inCharset, 0, -8); |
174
|
|
|
} |
175
|
|
|
} while ($loop); |
176
|
|
|
|
177
|
|
|
if (isset(self::$alias[$inCharset])) { |
178
|
|
|
$inCharset = self::$alias[$inCharset]; |
179
|
|
|
} |
180
|
|
|
if (isset(self::$alias[$outCharset])) { |
181
|
|
|
$outCharset = self::$alias[$outCharset]; |
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
// Load charset maps |
185
|
|
|
|
186
|
|
|
if (('utf-8' !== $inCharset && !self::loadMap('from.', $inCharset, $inMap)) |
187
|
|
|
|| ('utf-8' !== $outCharset && !self::loadMap('to.', $outCharset, $outMap))) { |
188
|
|
|
trigger_error(sprintf(self::ERROR_WRONG_CHARSET, $inCharset, $outCharset)); |
189
|
|
|
|
190
|
|
|
return false; |
191
|
|
|
} |
192
|
|
|
|
193
|
|
|
if ('utf-8' !== $inCharset) { |
194
|
|
|
// Convert input to UTF-8 |
195
|
|
|
$result = ''; |
196
|
|
|
if (self::mapToUtf8($result, $inMap, $str, $ignore)) { |
|
|
|
|
197
|
|
|
$str = $result; |
198
|
|
|
} else { |
199
|
|
|
$str = false; |
200
|
|
|
} |
201
|
|
|
self::$isValidUtf8 = true; |
202
|
|
|
} else { |
203
|
|
|
self::$isValidUtf8 = preg_match('//u', $str); |
204
|
|
|
|
205
|
|
|
if (!self::$isValidUtf8 && !$ignore) { |
206
|
|
|
trigger_error(self::ERROR_ILLEGAL_CHARACTER); |
207
|
|
|
|
208
|
|
|
return false; |
209
|
|
|
} |
210
|
|
|
|
211
|
|
|
if ('utf-8' === $outCharset) { |
212
|
|
|
// UTF-8 validation |
213
|
|
|
$str = self::utf8ToUtf8($str, $ignore); |
214
|
|
|
} |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
if ('utf-8' !== $outCharset && false !== $str) { |
218
|
|
|
// Convert output to UTF-8 |
219
|
|
|
$result = ''; |
220
|
|
|
if (self::mapFromUtf8($result, $outMap, $str, $ignore, $translit)) { |
|
|
|
|
221
|
|
|
return $result; |
222
|
|
|
} |
223
|
|
|
|
224
|
|
|
return false; |
225
|
|
|
} |
226
|
|
|
|
227
|
|
|
return $str; |
228
|
|
|
} |
229
|
|
|
|
230
|
|
|
public static function iconv_mime_decode_headers($str, $mode = 0, $charset = null) |
231
|
|
|
{ |
232
|
|
|
if (null === $charset) { |
233
|
|
|
$charset = self::$internalEncoding; |
234
|
|
|
} |
235
|
|
|
|
236
|
|
|
if (false !== strpos($str, "\r")) { |
237
|
|
|
$str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n"); |
238
|
|
|
} |
239
|
|
|
$str = explode("\n\n", $str, 2); |
240
|
|
|
|
241
|
|
|
$headers = []; |
242
|
|
|
|
243
|
|
|
$str = preg_split('/\n(?![ \t])/', $str[0]); |
244
|
|
|
foreach ($str as $str) { |
|
|
|
|
245
|
|
|
$str = self::iconv_mime_decode($str, $mode, $charset); |
246
|
|
|
if (false === $str) { |
247
|
|
|
return false; |
248
|
|
|
} |
249
|
|
|
$str = explode(':', $str, 2); |
250
|
|
|
|
251
|
|
|
if (2 === \count($str)) { |
252
|
|
|
if (isset($headers[$str[0]])) { |
253
|
|
|
if (!\is_array($headers[$str[0]])) { |
254
|
|
|
$headers[$str[0]] = [$headers[$str[0]]]; |
255
|
|
|
} |
256
|
|
|
$headers[$str[0]][] = ltrim($str[1]); |
257
|
|
|
} else { |
258
|
|
|
$headers[$str[0]] = ltrim($str[1]); |
259
|
|
|
} |
260
|
|
|
} |
261
|
|
|
} |
262
|
|
|
|
263
|
|
|
return $headers; |
264
|
|
|
} |
265
|
|
|
|
266
|
|
|
public static function iconv_mime_decode($str, $mode = 0, $charset = null) |
267
|
|
|
{ |
268
|
|
|
if (null === $charset) { |
269
|
|
|
$charset = self::$internalEncoding; |
270
|
|
|
} |
271
|
|
|
if (\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) { |
272
|
|
|
$charset .= '//IGNORE'; |
273
|
|
|
} |
274
|
|
|
|
275
|
|
|
if (false !== strpos($str, "\r")) { |
276
|
|
|
$str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n"); |
277
|
|
|
} |
278
|
|
|
$str = preg_split('/\n(?![ \t])/', rtrim($str), 2); |
279
|
|
|
$str = preg_replace('/[ \t]*\n[ \t]+/', ' ', rtrim($str[0])); |
280
|
|
|
$str = preg_split('/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/', $str, -1, \PREG_SPLIT_DELIM_CAPTURE); |
281
|
|
|
|
282
|
|
|
$result = self::iconv('utf-8', $charset, $str[0]); |
283
|
|
|
if (false === $result) { |
284
|
|
|
return false; |
285
|
|
|
} |
286
|
|
|
|
287
|
|
|
$i = 1; |
288
|
|
|
$len = \count($str); |
289
|
|
|
|
290
|
|
|
while ($i < $len) { |
291
|
|
|
$c = strtolower($str[$i]); |
292
|
|
|
if ((\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) |
293
|
|
|
&& 'utf-8' !== $c |
294
|
|
|
&& !isset(self::$alias[$c]) |
295
|
|
|
&& !self::loadMap('from.', $c, $d)) { |
296
|
|
|
$d = false; |
297
|
|
|
} elseif ('B' === strtoupper($str[$i + 1])) { |
298
|
|
|
$d = base64_decode($str[$i + 2]); |
299
|
|
|
} else { |
300
|
|
|
$d = rawurldecode(strtr(str_replace('%', '%25', $str[$i + 2]), '=_', '% ')); |
301
|
|
|
} |
302
|
|
|
|
303
|
|
|
if (false !== $d) { |
304
|
|
|
if ('' !== $d) { |
305
|
|
|
if ('' === $d = self::iconv($c, $charset, $d)) { |
306
|
|
|
$str[$i + 3] = substr($str[$i + 3], 1); |
307
|
|
|
} else { |
308
|
|
|
$result .= $d; |
309
|
|
|
} |
310
|
|
|
} |
311
|
|
|
$d = self::iconv('utf-8', $charset, $str[$i + 3]); |
312
|
|
|
if ('' !== trim($d)) { |
|
|
|
|
313
|
|
|
$result .= $d; |
314
|
|
|
} |
315
|
|
|
} elseif (\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) { |
316
|
|
|
$result .= "=?{$str[$i]}?{$str[$i + 1]}?{$str[$i + 2]}?={$str[$i + 3]}"; |
317
|
|
|
} else { |
318
|
|
|
$result = false; |
319
|
|
|
break; |
320
|
|
|
} |
321
|
|
|
|
322
|
|
|
$i += 4; |
323
|
|
|
} |
324
|
|
|
|
325
|
|
|
return $result; |
326
|
|
|
} |
327
|
|
|
|
328
|
|
|
public static function iconv_get_encoding($type = 'all') |
329
|
|
|
{ |
330
|
|
|
switch ($type) { |
331
|
|
|
case 'input_encoding': return self::$inputEncoding; |
332
|
|
|
case 'output_encoding': return self::$outputEncoding; |
333
|
|
|
case 'internal_encoding': return self::$internalEncoding; |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
return [ |
337
|
|
|
'input_encoding' => self::$inputEncoding, |
338
|
|
|
'output_encoding' => self::$outputEncoding, |
339
|
|
|
'internal_encoding' => self::$internalEncoding, |
340
|
|
|
]; |
341
|
|
|
} |
342
|
|
|
|
343
|
|
|
public static function iconv_set_encoding($type, $charset) |
344
|
|
|
{ |
345
|
|
|
switch ($type) { |
346
|
|
|
case 'input_encoding': self::$inputEncoding = $charset; break; |
347
|
|
|
case 'output_encoding': self::$outputEncoding = $charset; break; |
348
|
|
|
case 'internal_encoding': self::$internalEncoding = $charset; break; |
349
|
|
|
default: return false; |
350
|
|
|
} |
351
|
|
|
|
352
|
|
|
return true; |
353
|
|
|
} |
354
|
|
|
|
355
|
|
|
public static function iconv_mime_encode($fieldName, $fieldValue, $pref = null) |
356
|
|
|
{ |
357
|
|
|
if (!\is_array($pref)) { |
358
|
|
|
$pref = []; |
359
|
|
|
} |
360
|
|
|
|
361
|
|
|
$pref += [ |
362
|
|
|
'scheme' => 'B', |
363
|
|
|
'input-charset' => self::$internalEncoding, |
364
|
|
|
'output-charset' => self::$internalEncoding, |
365
|
|
|
'line-length' => 76, |
366
|
|
|
'line-break-chars' => "\r\n", |
367
|
|
|
]; |
368
|
|
|
|
369
|
|
|
if (preg_match('/[\x80-\xFF]/', $fieldName)) { |
370
|
|
|
$fieldName = ''; |
371
|
|
|
} |
372
|
|
|
|
373
|
|
|
$scheme = strtoupper(substr($pref['scheme'], 0, 1)); |
374
|
|
|
$in = strtolower($pref['input-charset']); |
375
|
|
|
$out = strtolower($pref['output-charset']); |
376
|
|
|
|
377
|
|
|
if ('utf-8' !== $in && false === $fieldValue = self::iconv($in, 'utf-8', $fieldValue)) { |
378
|
|
|
return false; |
379
|
|
|
} |
380
|
|
|
|
381
|
|
|
preg_match_all('/./us', $fieldValue, $chars); |
382
|
|
|
|
383
|
|
|
$chars = $chars[0] ?? []; |
384
|
|
|
|
385
|
|
|
$lineBreak = (int) $pref['line-length']; |
386
|
|
|
$lineStart = "=?{$pref['output-charset']}?{$scheme}?"; |
387
|
|
|
$lineLength = \strlen($fieldName) + 2 + \strlen($lineStart) + 2; |
388
|
|
|
$lineOffset = \strlen($lineStart) + 3; |
389
|
|
|
$lineData = ''; |
390
|
|
|
|
391
|
|
|
$fieldValue = []; |
392
|
|
|
|
393
|
|
|
$Q = 'Q' === $scheme; |
394
|
|
|
|
395
|
|
|
foreach ($chars as $c) { |
396
|
|
|
if ('utf-8' !== $out && false === $c = self::iconv('utf-8', $out, $c)) { |
397
|
|
|
return false; |
398
|
|
|
} |
399
|
|
|
|
400
|
|
|
$o = $Q |
401
|
|
|
? $c = preg_replace_callback( |
402
|
|
|
'/[=_\?\x00-\x1F\x80-\xFF]/', |
403
|
|
|
[__CLASS__, 'qpByteCallback'], |
404
|
|
|
$c |
405
|
|
|
) |
406
|
|
|
: base64_encode($lineData.$c); |
407
|
|
|
|
408
|
|
|
if (isset($o[$lineBreak - $lineLength])) { |
409
|
|
|
if (!$Q) { |
410
|
|
|
$lineData = base64_encode($lineData); |
411
|
|
|
} |
412
|
|
|
$fieldValue[] = $lineStart.$lineData.'?='; |
413
|
|
|
$lineLength = $lineOffset; |
414
|
|
|
$lineData = ''; |
415
|
|
|
} |
416
|
|
|
|
417
|
|
|
$lineData .= $c; |
418
|
|
|
$Q && $lineLength += \strlen($c); |
419
|
|
|
} |
420
|
|
|
|
421
|
|
|
if ('' !== $lineData) { |
422
|
|
|
if (!$Q) { |
423
|
|
|
$lineData = base64_encode($lineData); |
424
|
|
|
} |
425
|
|
|
$fieldValue[] = $lineStart.$lineData.'?='; |
426
|
|
|
} |
427
|
|
|
|
428
|
|
|
return $fieldName.': '.implode($pref['line-break-chars'].' ', $fieldValue); |
429
|
|
|
} |
430
|
|
|
|
431
|
|
|
public static function iconv_strlen($s, $encoding = null) |
432
|
|
|
{ |
433
|
|
|
static $hasXml = null; |
434
|
|
|
if (null === $hasXml) { |
435
|
|
|
$hasXml = \extension_loaded('xml'); |
436
|
|
|
} |
437
|
|
|
|
438
|
|
|
if ($hasXml) { |
439
|
|
|
return self::strlen1($s, $encoding); |
440
|
|
|
} |
441
|
|
|
|
442
|
|
|
return self::strlen2($s, $encoding); |
443
|
|
|
} |
444
|
|
|
|
445
|
|
|
public static function strlen1($s, $encoding = null) |
446
|
|
|
{ |
447
|
|
|
if (null === $encoding) { |
448
|
|
|
$encoding = self::$internalEncoding; |
449
|
|
|
} |
450
|
|
|
if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) { |
451
|
|
|
return false; |
452
|
|
|
} |
453
|
|
|
|
454
|
|
|
return \strlen(utf8_decode($s)); |
455
|
|
|
} |
456
|
|
|
|
457
|
|
|
public static function strlen2($s, $encoding = null) |
458
|
|
|
{ |
459
|
|
|
if (null === $encoding) { |
460
|
|
|
$encoding = self::$internalEncoding; |
461
|
|
|
} |
462
|
|
|
if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) { |
463
|
|
|
return false; |
464
|
|
|
} |
465
|
|
|
|
466
|
|
|
$ulenMask = self::$ulenMask; |
467
|
|
|
|
468
|
|
|
$i = 0; |
469
|
|
|
$j = 0; |
470
|
|
|
$len = \strlen($s); |
471
|
|
|
|
472
|
|
|
while ($i < $len) { |
473
|
|
|
$u = $s[$i] & "\xF0"; |
474
|
|
|
$i += $ulenMask[$u] ?? 1; |
475
|
|
|
++$j; |
476
|
|
|
} |
477
|
|
|
|
478
|
|
|
return $j; |
479
|
|
|
} |
480
|
|
|
|
481
|
|
|
public static function iconv_strpos($haystack, $needle, $offset = 0, $encoding = null) |
482
|
|
|
{ |
483
|
|
|
if (null === $encoding) { |
484
|
|
|
$encoding = self::$internalEncoding; |
485
|
|
|
} |
486
|
|
|
|
487
|
|
|
if (0 !== stripos($encoding, 'utf-8')) { |
488
|
|
|
if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) { |
489
|
|
|
return false; |
490
|
|
|
} |
491
|
|
|
if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) { |
492
|
|
|
return false; |
493
|
|
|
} |
494
|
|
|
} |
495
|
|
|
|
496
|
|
|
if ($offset = (int) $offset) { |
497
|
|
|
$haystack = self::iconv_substr($haystack, $offset, 2147483647, 'utf-8'); |
498
|
|
|
} |
499
|
|
|
$pos = strpos($haystack, $needle); |
500
|
|
|
|
501
|
|
|
return false === $pos ? false : ($offset + ($pos ? self::iconv_strlen(substr($haystack, 0, $pos), 'utf-8') : 0)); |
502
|
|
|
} |
503
|
|
|
|
504
|
|
|
public static function iconv_strrpos($haystack, $needle, $encoding = null) |
505
|
|
|
{ |
506
|
|
|
if (null === $encoding) { |
507
|
|
|
$encoding = self::$internalEncoding; |
508
|
|
|
} |
509
|
|
|
|
510
|
|
|
if (0 !== stripos($encoding, 'utf-8')) { |
511
|
|
|
if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) { |
512
|
|
|
return false; |
513
|
|
|
} |
514
|
|
|
if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) { |
515
|
|
|
return false; |
516
|
|
|
} |
517
|
|
|
} |
518
|
|
|
|
519
|
|
|
$pos = isset($needle[0]) ? strrpos($haystack, $needle) : false; |
520
|
|
|
|
521
|
|
|
return false === $pos ? false : self::iconv_strlen($pos ? substr($haystack, 0, $pos) : $haystack, 'utf-8'); |
522
|
|
|
} |
523
|
|
|
|
524
|
|
|
public static function iconv_substr($s, $start, $length = 2147483647, $encoding = null) |
525
|
|
|
{ |
526
|
|
|
if (null === $encoding) { |
527
|
|
|
$encoding = self::$internalEncoding; |
528
|
|
|
} |
529
|
|
|
if (0 !== stripos($encoding, 'utf-8')) { |
530
|
|
|
$encoding = null; |
531
|
|
|
} elseif (false === $s = self::iconv($encoding, 'utf-8', $s)) { |
532
|
|
|
return false; |
533
|
|
|
} |
534
|
|
|
|
535
|
|
|
$s = (string) $s; |
536
|
|
|
$slen = self::iconv_strlen($s, 'utf-8'); |
537
|
|
|
$start = (int) $start; |
538
|
|
|
|
539
|
|
|
if (0 > $start) { |
540
|
|
|
$start += $slen; |
541
|
|
|
} |
542
|
|
|
if (0 > $start) { |
543
|
|
|
if (\PHP_VERSION_ID < 80000) { |
544
|
|
|
return false; |
545
|
|
|
} |
546
|
|
|
|
547
|
|
|
$start = 0; |
548
|
|
|
} |
549
|
|
|
if ($start >= $slen) { |
550
|
|
|
return \PHP_VERSION_ID >= 80000 ? '' : false; |
551
|
|
|
} |
552
|
|
|
|
553
|
|
|
$rx = $slen - $start; |
554
|
|
|
|
555
|
|
|
if (0 > $length) { |
556
|
|
|
$length += $rx; |
557
|
|
|
} |
558
|
|
|
if (0 === $length) { |
559
|
|
|
return ''; |
560
|
|
|
} |
561
|
|
|
if (0 > $length) { |
562
|
|
|
return \PHP_VERSION_ID >= 80000 ? '' : false; |
563
|
|
|
} |
564
|
|
|
|
565
|
|
|
if ($length > $rx) { |
566
|
|
|
$length = $rx; |
567
|
|
|
} |
568
|
|
|
|
569
|
|
|
$rx = '/^'.($start ? self::pregOffset($start) : '').'('.self::pregOffset($length).')/u'; |
570
|
|
|
|
571
|
|
|
$s = preg_match($rx, $s, $s) ? $s[1] : ''; |
|
|
|
|
572
|
|
|
|
573
|
|
|
if (null === $encoding) { |
574
|
|
|
return $s; |
575
|
|
|
} |
576
|
|
|
|
577
|
|
|
return self::iconv('utf-8', $encoding, $s); |
578
|
|
|
} |
579
|
|
|
|
580
|
|
|
private static function loadMap($type, $charset, &$map) |
581
|
|
|
{ |
582
|
|
|
if (!isset(self::$convertMap[$type.$charset])) { |
583
|
|
|
if (false === $map = self::getData($type.$charset)) { |
584
|
|
|
if ('to.' === $type && self::loadMap('from.', $charset, $map)) { |
585
|
|
|
$map = array_flip($map); |
586
|
|
|
} else { |
587
|
|
|
return false; |
588
|
|
|
} |
589
|
|
|
} |
590
|
|
|
|
591
|
|
|
self::$convertMap[$type.$charset] = $map; |
592
|
|
|
} else { |
593
|
|
|
$map = self::$convertMap[$type.$charset]; |
594
|
|
|
} |
595
|
|
|
|
596
|
|
|
return true; |
597
|
|
|
} |
598
|
|
|
|
599
|
|
|
private static function utf8ToUtf8($str, $ignore) |
600
|
|
|
{ |
601
|
|
|
$ulenMask = self::$ulenMask; |
602
|
|
|
$valid = self::$isValidUtf8; |
603
|
|
|
|
604
|
|
|
$u = $str; |
605
|
|
|
$i = $j = 0; |
606
|
|
|
$len = \strlen($str); |
607
|
|
|
|
608
|
|
|
while ($i < $len) { |
609
|
|
|
if ($str[$i] < "\x80") { |
610
|
|
|
$u[$j++] = $str[$i++]; |
611
|
|
|
} else { |
612
|
|
|
$ulen = $str[$i] & "\xF0"; |
613
|
|
|
$ulen = $ulenMask[$ulen] ?? 1; |
614
|
|
|
$uchr = substr($str, $i, $ulen); |
615
|
|
|
|
616
|
|
|
if (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr))) { |
617
|
|
|
if ($ignore) { |
618
|
|
|
++$i; |
619
|
|
|
continue; |
620
|
|
|
} |
621
|
|
|
|
622
|
|
|
trigger_error(self::ERROR_ILLEGAL_CHARACTER); |
623
|
|
|
|
624
|
|
|
return false; |
625
|
|
|
} |
626
|
|
|
|
627
|
|
|
$i += $ulen; |
628
|
|
|
|
629
|
|
|
$u[$j++] = $uchr[0]; |
630
|
|
|
|
631
|
|
|
isset($uchr[1]) && 0 !== ($u[$j++] = $uchr[1]) |
632
|
|
|
&& isset($uchr[2]) && 0 !== ($u[$j++] = $uchr[2]) |
633
|
|
|
&& isset($uchr[3]) && 0 !== ($u[$j++] = $uchr[3]); |
634
|
|
|
} |
635
|
|
|
} |
636
|
|
|
|
637
|
|
|
return substr($u, 0, $j); |
638
|
|
|
} |
639
|
|
|
|
640
|
|
|
private static function mapToUtf8(&$result, array $map, $str, $ignore) |
641
|
|
|
{ |
642
|
|
|
$len = \strlen($str); |
643
|
|
|
for ($i = 0; $i < $len; ++$i) { |
644
|
|
|
if (isset($str[$i + 1], $map[$str[$i].$str[$i + 1]])) { |
645
|
|
|
$result .= $map[$str[$i].$str[++$i]]; |
646
|
|
|
} elseif (isset($map[$str[$i]])) { |
647
|
|
|
$result .= $map[$str[$i]]; |
648
|
|
|
} elseif (!$ignore) { |
649
|
|
|
trigger_error(self::ERROR_ILLEGAL_CHARACTER); |
650
|
|
|
|
651
|
|
|
return false; |
652
|
|
|
} |
653
|
|
|
} |
654
|
|
|
|
655
|
|
|
return true; |
656
|
|
|
} |
657
|
|
|
|
658
|
|
|
private static function mapFromUtf8(&$result, array $map, $str, $ignore, $translit) |
659
|
|
|
{ |
660
|
|
|
$ulenMask = self::$ulenMask; |
661
|
|
|
$valid = self::$isValidUtf8; |
662
|
|
|
|
663
|
|
|
if ($translit && !self::$translitMap) { |
|
|
|
|
664
|
|
|
self::$translitMap = self::getData('translit'); |
665
|
|
|
} |
666
|
|
|
|
667
|
|
|
$i = 0; |
668
|
|
|
$len = \strlen($str); |
669
|
|
|
|
670
|
|
|
while ($i < $len) { |
671
|
|
|
if ($str[$i] < "\x80") { |
672
|
|
|
$uchr = $str[$i++]; |
673
|
|
|
} else { |
674
|
|
|
$ulen = $str[$i] & "\xF0"; |
675
|
|
|
$ulen = $ulenMask[$ulen] ?? 1; |
676
|
|
|
$uchr = substr($str, $i, $ulen); |
677
|
|
|
|
678
|
|
|
if ($ignore && (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr)))) { |
679
|
|
|
++$i; |
680
|
|
|
continue; |
681
|
|
|
} |
682
|
|
|
|
683
|
|
|
$i += $ulen; |
684
|
|
|
} |
685
|
|
|
|
686
|
|
|
if (isset($map[$uchr])) { |
687
|
|
|
$result .= $map[$uchr]; |
688
|
|
|
} elseif ($translit) { |
689
|
|
|
if (isset(self::$translitMap[$uchr])) { |
690
|
|
|
$uchr = self::$translitMap[$uchr]; |
691
|
|
|
} elseif ($uchr >= "\xC3\x80") { |
692
|
|
|
$uchr = \Normalizer::normalize($uchr, \Normalizer::NFD); |
693
|
|
|
|
694
|
|
|
if ($uchr[0] < "\x80") { |
695
|
|
|
$uchr = $uchr[0]; |
696
|
|
|
} elseif ($ignore) { |
697
|
|
|
continue; |
698
|
|
|
} else { |
699
|
|
|
return false; |
700
|
|
|
} |
701
|
|
|
} elseif ($ignore) { |
702
|
|
|
continue; |
703
|
|
|
} else { |
704
|
|
|
return false; |
705
|
|
|
} |
706
|
|
|
|
707
|
|
|
$str = $uchr.substr($str, $i); |
708
|
|
|
$len = \strlen($str); |
709
|
|
|
$i = 0; |
710
|
|
|
} elseif (!$ignore) { |
711
|
|
|
return false; |
712
|
|
|
} |
713
|
|
|
} |
714
|
|
|
|
715
|
|
|
return true; |
716
|
|
|
} |
717
|
|
|
|
718
|
|
|
private static function qpByteCallback(array $m) |
719
|
|
|
{ |
720
|
|
|
return '='.strtoupper(dechex(\ord($m[0]))); |
721
|
|
|
} |
722
|
|
|
|
723
|
|
|
private static function pregOffset($offset) |
724
|
|
|
{ |
725
|
|
|
$rx = []; |
726
|
|
|
$offset = (int) $offset; |
727
|
|
|
|
728
|
|
|
while ($offset > 65535) { |
729
|
|
|
$rx[] = '.{65535}'; |
730
|
|
|
$offset -= 65535; |
731
|
|
|
} |
732
|
|
|
|
733
|
|
|
return implode('', $rx).'.{'.$offset.'}'; |
734
|
|
|
} |
735
|
|
|
|
736
|
|
|
private static function getData($file) |
737
|
|
|
{ |
738
|
|
|
if (file_exists($file = __DIR__.'/Resources/charset/'.$file.'.php')) { |
739
|
|
|
return require $file; |
740
|
|
|
} |
741
|
|
|
|
742
|
|
|
return false; |
743
|
|
|
} |
744
|
|
|
} |
745
|
|
|
|