1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace voku\helper; |
6
|
|
|
|
7
|
|
|
/** |
8
|
|
|
* @psalm-immutable |
9
|
|
|
*/ |
10
|
|
|
final class UTF8 |
11
|
|
|
{ |
12
|
|
|
/** |
13
|
|
|
* Bom => Byte-Length |
14
|
|
|
* |
15
|
|
|
* INFO: https://en.wikipedia.org/wiki/Byte_order_mark |
16
|
|
|
* |
17
|
|
|
* @var array<string, int> |
18
|
|
|
*/ |
19
|
|
|
private static $BOM = [ |
20
|
|
|
"\xef\xbb\xbf" => 3, // UTF-8 BOM |
21
|
|
|
'' => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...) |
22
|
|
|
"\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM |
23
|
|
|
' þÿ' => 6, // UTF-32 (BE) BOM as "WINDOWS-1252" |
24
|
|
|
"\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM |
25
|
|
|
'ÿþ ' => 6, // UTF-32 (LE) BOM as "WINDOWS-1252" |
26
|
|
|
"\xfe\xff" => 2, // UTF-16 (BE) BOM |
27
|
|
|
'þÿ' => 4, // UTF-16 (BE) BOM as "WINDOWS-1252" |
28
|
|
|
"\xff\xfe" => 2, // UTF-16 (LE) BOM |
29
|
|
|
'ÿþ' => 4, // UTF-16 (LE) BOM as "WINDOWS-1252" |
30
|
|
|
]; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* Numeric code point => UTF-8 Character |
34
|
|
|
* |
35
|
|
|
* url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp |
36
|
|
|
* |
37
|
|
|
* @var array<int, string> |
38
|
|
|
*/ |
39
|
|
|
private static $WHITESPACE = [ |
40
|
|
|
// NULL Byte |
41
|
|
|
0 => "\x0", |
42
|
|
|
// Tab |
43
|
|
|
9 => "\x9", |
44
|
|
|
// New Line |
45
|
|
|
10 => "\xa", |
46
|
|
|
// Vertical Tab |
47
|
|
|
11 => "\xb", |
48
|
|
|
// Carriage Return |
49
|
|
|
13 => "\xd", |
50
|
|
|
// Ordinary Space |
51
|
|
|
32 => "\x20", |
52
|
|
|
// NO-BREAK SPACE |
53
|
|
|
160 => "\xc2\xa0", |
54
|
|
|
// OGHAM SPACE MARK |
55
|
|
|
5760 => "\xe1\x9a\x80", |
56
|
|
|
// MONGOLIAN VOWEL SEPARATOR |
57
|
|
|
6158 => "\xe1\xa0\x8e", |
58
|
|
|
// EN QUAD |
59
|
|
|
8192 => "\xe2\x80\x80", |
60
|
|
|
// EM QUAD |
61
|
|
|
8193 => "\xe2\x80\x81", |
62
|
|
|
// EN SPACE |
63
|
|
|
8194 => "\xe2\x80\x82", |
64
|
|
|
// EM SPACE |
65
|
|
|
8195 => "\xe2\x80\x83", |
66
|
|
|
// THREE-PER-EM SPACE |
67
|
|
|
8196 => "\xe2\x80\x84", |
68
|
|
|
// FOUR-PER-EM SPACE |
69
|
|
|
8197 => "\xe2\x80\x85", |
70
|
|
|
// SIX-PER-EM SPACE |
71
|
|
|
8198 => "\xe2\x80\x86", |
72
|
|
|
// FIGURE SPACE |
73
|
|
|
8199 => "\xe2\x80\x87", |
74
|
|
|
// PUNCTUATION SPACE |
75
|
|
|
8200 => "\xe2\x80\x88", |
76
|
|
|
// THIN SPACE |
77
|
|
|
8201 => "\xe2\x80\x89", |
78
|
|
|
// HAIR SPACE |
79
|
|
|
8202 => "\xe2\x80\x8a", |
80
|
|
|
// LINE SEPARATOR |
81
|
|
|
8232 => "\xe2\x80\xa8", |
82
|
|
|
// PARAGRAPH SEPARATOR |
83
|
|
|
8233 => "\xe2\x80\xa9", |
84
|
|
|
// NARROW NO-BREAK SPACE |
85
|
|
|
8239 => "\xe2\x80\xaf", |
86
|
|
|
// MEDIUM MATHEMATICAL SPACE |
87
|
|
|
8287 => "\xe2\x81\x9f", |
88
|
|
|
// HALFWIDTH HANGUL FILLER |
89
|
|
|
65440 => "\xef\xbe\xa0", |
90
|
|
|
// IDEOGRAPHIC SPACE |
91
|
|
|
12288 => "\xe3\x80\x80", |
92
|
|
|
]; |
93
|
|
|
|
94
|
|
|
/** |
95
|
|
|
* @var array<string, string> |
96
|
|
|
*/ |
97
|
|
|
private static $WHITESPACE_TABLE = [ |
98
|
|
|
'SPACE' => "\x20", |
99
|
|
|
'NO-BREAK SPACE' => "\xc2\xa0", |
100
|
|
|
'OGHAM SPACE MARK' => "\xe1\x9a\x80", |
101
|
|
|
'EN QUAD' => "\xe2\x80\x80", |
102
|
|
|
'EM QUAD' => "\xe2\x80\x81", |
103
|
|
|
'EN SPACE' => "\xe2\x80\x82", |
104
|
|
|
'EM SPACE' => "\xe2\x80\x83", |
105
|
|
|
'THREE-PER-EM SPACE' => "\xe2\x80\x84", |
106
|
|
|
'FOUR-PER-EM SPACE' => "\xe2\x80\x85", |
107
|
|
|
'SIX-PER-EM SPACE' => "\xe2\x80\x86", |
108
|
|
|
'FIGURE SPACE' => "\xe2\x80\x87", |
109
|
|
|
'PUNCTUATION SPACE' => "\xe2\x80\x88", |
110
|
|
|
'THIN SPACE' => "\xe2\x80\x89", |
111
|
|
|
'HAIR SPACE' => "\xe2\x80\x8a", |
112
|
|
|
'LINE SEPARATOR' => "\xe2\x80\xa8", |
113
|
|
|
'PARAGRAPH SEPARATOR' => "\xe2\x80\xa9", |
114
|
|
|
'ZERO WIDTH SPACE' => "\xe2\x80\x8b", |
115
|
|
|
'NARROW NO-BREAK SPACE' => "\xe2\x80\xaf", |
116
|
|
|
'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f", |
117
|
|
|
'IDEOGRAPHIC SPACE' => "\xe3\x80\x80", |
118
|
|
|
'HALFWIDTH HANGUL FILLER' => "\xef\xbe\xa0", |
119
|
|
|
]; |
120
|
|
|
|
121
|
|
|
/** |
122
|
|
|
* @var array |
123
|
|
|
* |
124
|
|
|
* @phpstan-var array{upper: string[], lower: string[]} |
125
|
|
|
*/ |
126
|
|
|
private static $COMMON_CASE_FOLD = [ |
127
|
|
|
'upper' => [ |
128
|
|
|
'µ', |
129
|
|
|
'ſ', |
130
|
|
|
"\xCD\x85", |
131
|
|
|
'ς', |
132
|
|
|
'ẞ', |
133
|
|
|
"\xCF\x90", |
134
|
|
|
"\xCF\x91", |
135
|
|
|
"\xCF\x95", |
136
|
|
|
"\xCF\x96", |
137
|
|
|
"\xCF\xB0", |
138
|
|
|
"\xCF\xB1", |
139
|
|
|
"\xCF\xB5", |
140
|
|
|
"\xE1\xBA\x9B", |
141
|
|
|
"\xE1\xBE\xBE", |
142
|
|
|
], |
143
|
|
|
'lower' => [ |
144
|
|
|
'μ', |
145
|
|
|
's', |
146
|
|
|
'ι', |
147
|
|
|
'σ', |
148
|
|
|
'ß', |
149
|
|
|
'β', |
150
|
|
|
'θ', |
151
|
|
|
'φ', |
152
|
|
|
'π', |
153
|
|
|
'κ', |
154
|
|
|
'ρ', |
155
|
|
|
'ε', |
156
|
|
|
"\xE1\xB9\xA1", |
157
|
|
|
'ι', |
158
|
|
|
], |
159
|
|
|
]; |
160
|
|
|
|
161
|
|
|
/** |
162
|
|
|
* @var array |
163
|
|
|
* |
164
|
|
|
* @phpstan-var array<string, mixed> |
165
|
|
|
*/ |
166
|
|
|
private static $SUPPORT = []; |
167
|
|
|
|
168
|
|
|
/** |
169
|
|
|
* @var string[]|null |
170
|
|
|
* |
171
|
|
|
* @phpstan-var array<string, string>|null |
172
|
|
|
*/ |
173
|
|
|
private static $BROKEN_UTF8_FIX; |
174
|
|
|
|
175
|
|
|
/** |
176
|
|
|
* @var string[]|null |
177
|
|
|
* |
178
|
|
|
* @phpstan-var array<int, string>|null |
179
|
|
|
*/ |
180
|
|
|
private static $WIN1252_TO_UTF8; |
181
|
|
|
|
182
|
|
|
/** |
183
|
|
|
* @var string[]|null |
184
|
|
|
* |
185
|
|
|
* @phpstan-var array<int ,string>|null |
186
|
|
|
*/ |
187
|
|
|
private static $INTL_TRANSLITERATOR_LIST; |
188
|
|
|
|
189
|
|
|
/** |
190
|
|
|
* @var string[]|null |
191
|
|
|
* |
192
|
|
|
* @phpstan-var array<string>|null |
193
|
|
|
*/ |
194
|
|
|
private static $ENCODINGS; |
195
|
|
|
|
196
|
|
|
/** |
197
|
|
|
* @var int[]|null |
198
|
|
|
* |
199
|
|
|
* @phpstan-var array<string ,int>|null |
200
|
|
|
*/ |
201
|
|
|
private static $ORD; |
202
|
|
|
|
203
|
|
|
/** |
204
|
|
|
* @var string[]|null |
205
|
|
|
* |
206
|
|
|
* @phpstan-var array<string, string>|null |
207
|
|
|
*/ |
208
|
|
|
private static $EMOJI; |
209
|
|
|
|
210
|
|
|
/** |
211
|
|
|
* @var string[]|null |
212
|
|
|
* |
213
|
|
|
* @phpstan-var array<string>|null |
214
|
|
|
*/ |
215
|
|
|
private static $EMOJI_VALUES_CACHE; |
216
|
|
|
|
217
|
|
|
/** |
218
|
|
|
* @var string[]|null |
219
|
|
|
* |
220
|
|
|
* @phpstan-var array<string>|null |
221
|
|
|
*/ |
222
|
|
|
private static $EMOJI_KEYS_CACHE; |
223
|
|
|
|
224
|
|
|
/** |
225
|
|
|
* @var string[]|null |
226
|
|
|
* |
227
|
|
|
* @phpstan-var array<string>|null |
228
|
|
|
*/ |
229
|
|
|
private static $EMOJI_KEYS_REVERSIBLE_CACHE; |
230
|
|
|
|
231
|
|
|
/** |
232
|
|
|
* @var string[]|null |
233
|
|
|
* |
234
|
|
|
* @phpstan-var array<int, string>|null |
235
|
|
|
*/ |
236
|
|
|
private static $CHR; |
237
|
|
|
|
238
|
|
|
/** |
239
|
|
|
* __construct() |
240
|
|
|
*/ |
241
|
34 |
|
public function __construct() |
242
|
|
|
{ |
243
|
34 |
|
} |
244
|
|
|
|
245
|
|
|
/** |
246
|
|
|
* Return the character at the specified position: $str[1] like functionality. |
247
|
|
|
* |
248
|
|
|
* EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code> |
249
|
|
|
* |
250
|
|
|
* @param string $str <p>A UTF-8 string.</p> |
251
|
|
|
* @param int $pos <p>The position of character to return.</p> |
252
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
253
|
|
|
* |
254
|
|
|
* @psalm-pure |
255
|
|
|
* |
256
|
|
|
* @return string |
257
|
|
|
* <p>Single multi-byte character.</p> |
258
|
|
|
*/ |
259
|
3 |
|
public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string |
260
|
|
|
{ |
261
|
3 |
|
if ($str === '' || $pos < 0) { |
262
|
2 |
|
return ''; |
263
|
|
|
} |
264
|
|
|
|
265
|
3 |
|
if ($encoding === 'UTF-8') { |
266
|
3 |
|
return (string) \mb_substr($str, $pos, 1); |
267
|
|
|
} |
268
|
|
|
|
269
|
|
|
return (string) self::substr($str, $pos, 1, $encoding); |
270
|
|
|
} |
271
|
|
|
|
272
|
|
|
/** |
273
|
|
|
* Prepends UTF-8 BOM character to the string and returns the whole string. |
274
|
|
|
* |
275
|
|
|
* INFO: If BOM already existed there, the Input string is returned. |
276
|
|
|
* |
277
|
|
|
* EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code> |
278
|
|
|
* |
279
|
|
|
* @param string $str <p>The input string.</p> |
280
|
|
|
* |
281
|
|
|
* @psalm-pure |
282
|
|
|
* |
283
|
|
|
* @return string |
284
|
|
|
* <p>The output string that contains BOM.</p> |
285
|
|
|
*/ |
286
|
2 |
|
public static function add_bom_to_string(string $str): string |
287
|
|
|
{ |
288
|
2 |
|
if (!self::string_has_bom($str)) { |
289
|
2 |
|
$str = self::bom() . $str; |
290
|
|
|
} |
291
|
|
|
|
292
|
2 |
|
return $str; |
293
|
|
|
} |
294
|
|
|
|
295
|
|
|
/** |
296
|
|
|
* Changes all keys in an array. |
297
|
|
|
* |
298
|
|
|
* @param array<string, mixed> $array <p>The array to work on</p> |
299
|
|
|
* @param int $case [optional] <p> Either <strong>CASE_UPPER</strong><br> |
300
|
|
|
* or <strong>CASE_LOWER</strong> (default)</p> |
301
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
302
|
|
|
* |
303
|
|
|
* @psalm-pure |
304
|
|
|
* |
305
|
|
|
* @return string[] |
306
|
|
|
* <p>An array with its keys lower- or uppercased.</p> |
307
|
|
|
*/ |
308
|
2 |
|
public static function array_change_key_case( |
309
|
|
|
array $array, |
310
|
|
|
int $case = \CASE_LOWER, |
311
|
|
|
string $encoding = 'UTF-8' |
312
|
|
|
): array { |
313
|
|
|
if ( |
314
|
2 |
|
$case !== \CASE_LOWER |
315
|
|
|
&& |
316
|
2 |
|
$case !== \CASE_UPPER |
317
|
|
|
) { |
318
|
|
|
$case = \CASE_LOWER; |
319
|
|
|
} |
320
|
|
|
|
321
|
2 |
|
$return = []; |
322
|
2 |
|
foreach ($array as $key => &$value) { |
323
|
2 |
|
$key = $case === \CASE_LOWER |
324
|
2 |
|
? self::strtolower($key, $encoding) |
325
|
2 |
|
: self::strtoupper($key, $encoding); |
326
|
|
|
|
327
|
2 |
|
$return[$key] = $value; |
328
|
|
|
} |
329
|
|
|
|
330
|
2 |
|
return $return; |
331
|
|
|
} |
332
|
|
|
|
333
|
|
|
/** |
334
|
|
|
* Returns the substring between $start and $end, if found, or an empty |
335
|
|
|
* string. An optional offset may be supplied from which to begin the |
336
|
|
|
* search for the start string. |
337
|
|
|
* |
338
|
|
|
* @param string $str |
339
|
|
|
* @param string $start <p>Delimiter marking the start of the substring.</p> |
340
|
|
|
* @param string $end <p>Delimiter marking the end of the substring.</p> |
341
|
|
|
* @param int $offset [optional] <p>Index from which to begin the search. Default: 0</p> |
342
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
343
|
|
|
* |
344
|
|
|
* @psalm-pure |
345
|
|
|
* |
346
|
|
|
* @return string |
347
|
|
|
*/ |
348
|
16 |
|
public static function between( |
349
|
|
|
string $str, |
350
|
|
|
string $start, |
351
|
|
|
string $end, |
352
|
|
|
int $offset = 0, |
353
|
|
|
string $encoding = 'UTF-8' |
354
|
|
|
): string { |
355
|
16 |
|
if ($encoding === 'UTF-8') { |
356
|
8 |
|
$start_position = \mb_strpos($str, $start, $offset); |
357
|
8 |
|
if ($start_position === false) { |
358
|
1 |
|
return ''; |
359
|
|
|
} |
360
|
|
|
|
361
|
7 |
|
$substr_index = $start_position + (int) \mb_strlen($start); |
362
|
7 |
|
$end_position = \mb_strpos($str, $end, $substr_index); |
363
|
|
|
if ( |
364
|
7 |
|
$end_position === false |
365
|
|
|
|| |
366
|
7 |
|
$end_position === $substr_index |
367
|
|
|
) { |
368
|
2 |
|
return ''; |
369
|
|
|
} |
370
|
|
|
|
371
|
5 |
|
return (string) \mb_substr($str, $substr_index, $end_position - $substr_index); |
372
|
|
|
} |
373
|
|
|
|
374
|
8 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
375
|
|
|
|
376
|
8 |
|
$start_position = self::strpos($str, $start, $offset, $encoding); |
377
|
8 |
|
if ($start_position === false) { |
378
|
1 |
|
return ''; |
379
|
|
|
} |
380
|
|
|
|
381
|
7 |
|
$substr_index = $start_position + (int) self::strlen($start, $encoding); |
382
|
7 |
|
$end_position = self::strpos($str, $end, $substr_index, $encoding); |
383
|
|
|
if ( |
384
|
7 |
|
$end_position === false |
385
|
|
|
|| |
386
|
7 |
|
$end_position === $substr_index |
387
|
|
|
) { |
388
|
2 |
|
return ''; |
389
|
|
|
} |
390
|
|
|
|
391
|
5 |
|
return (string) self::substr( |
392
|
5 |
|
$str, |
393
|
|
|
$substr_index, |
394
|
5 |
|
$end_position - $substr_index, |
395
|
|
|
$encoding |
396
|
|
|
); |
397
|
|
|
} |
398
|
|
|
|
399
|
|
|
/** |
400
|
|
|
* Convert binary into a string. |
401
|
|
|
* |
402
|
|
|
* INFO: opposite to UTF8::str_to_binary() |
403
|
|
|
* |
404
|
|
|
* EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code> |
405
|
|
|
* |
406
|
|
|
* @param string $bin 1|0 |
407
|
|
|
* |
408
|
|
|
* @psalm-pure |
409
|
|
|
* |
410
|
|
|
* @return string |
411
|
|
|
*/ |
412
|
2 |
|
public static function binary_to_str($bin): string |
413
|
|
|
{ |
414
|
2 |
|
if (!isset($bin[0])) { |
415
|
|
|
return ''; |
416
|
|
|
} |
417
|
|
|
|
418
|
2 |
|
$convert = \base_convert($bin, 2, 16); |
419
|
2 |
|
if ($convert === '0') { |
420
|
1 |
|
return ''; |
421
|
|
|
} |
422
|
|
|
|
423
|
2 |
|
return \pack('H*', $convert); |
424
|
|
|
} |
425
|
|
|
|
426
|
|
|
/** |
427
|
|
|
* Returns the UTF-8 Byte Order Mark Character. |
428
|
|
|
* |
429
|
|
|
* INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values |
430
|
|
|
* |
431
|
|
|
* EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code> |
432
|
|
|
* |
433
|
|
|
* @psalm-pure |
434
|
|
|
* |
435
|
|
|
* @return string |
436
|
|
|
* <p>UTF-8 Byte Order Mark.</p> |
437
|
|
|
*/ |
438
|
4 |
|
public static function bom(): string |
439
|
|
|
{ |
440
|
4 |
|
return "\xef\xbb\xbf"; |
441
|
|
|
} |
442
|
|
|
|
443
|
|
|
/** |
444
|
|
|
* @alias of UTF8::chr_map() |
445
|
|
|
* |
446
|
|
|
* @param callable $callback |
447
|
|
|
* @param string $str |
448
|
|
|
* |
449
|
|
|
* @psalm-pure |
450
|
|
|
* |
451
|
|
|
* @return string[] |
452
|
|
|
* |
453
|
|
|
* @see UTF8::chr_map() |
454
|
|
|
*/ |
455
|
2 |
|
public static function callback($callback, string $str): array |
456
|
|
|
{ |
457
|
2 |
|
return self::chr_map($callback, $str); |
458
|
|
|
} |
459
|
|
|
|
460
|
|
|
/** |
461
|
|
|
* Returns the character at $index, with indexes starting at 0. |
462
|
|
|
* |
463
|
|
|
* @param string $str <p>The input string.</p> |
464
|
|
|
* @param int $index <p>Position of the character.</p> |
465
|
|
|
* @param string $encoding [optional] <p>Default is UTF-8</p> |
466
|
|
|
* |
467
|
|
|
* @psalm-pure |
468
|
|
|
* |
469
|
|
|
* @return string |
470
|
|
|
* <p>The character at $index.</p> |
471
|
|
|
*/ |
472
|
9 |
|
public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string |
473
|
|
|
{ |
474
|
9 |
|
if ($encoding === 'UTF-8') { |
475
|
5 |
|
return (string) \mb_substr($str, $index, 1); |
476
|
|
|
} |
477
|
|
|
|
478
|
4 |
|
return (string) self::substr($str, $index, 1, $encoding); |
479
|
|
|
} |
480
|
|
|
|
481
|
|
|
/** |
482
|
|
|
* Returns an array consisting of the characters in the string. |
483
|
|
|
* |
484
|
|
|
* @param string $str <p>The input string.</p> |
485
|
|
|
* |
486
|
|
|
* @psalm-pure |
487
|
|
|
* |
488
|
|
|
* @return string[] |
489
|
|
|
* <p>An array of chars.</p> |
490
|
|
|
*/ |
491
|
4 |
|
public static function chars(string $str): array |
492
|
|
|
{ |
493
|
|
|
/** @var string[] */ |
494
|
4 |
|
return self::str_split($str); |
|
|
|
|
495
|
|
|
} |
496
|
|
|
|
497
|
|
|
/** |
498
|
|
|
* This method will auto-detect your server environment for UTF-8 support. |
499
|
|
|
* |
500
|
|
|
* @return true|null |
501
|
|
|
* |
502
|
|
|
* @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p> |
503
|
|
|
*/ |
504
|
4 |
|
public static function checkForSupport() |
505
|
|
|
{ |
506
|
4 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
507
|
|
|
self::$SUPPORT['already_checked_via_portable_utf8'] = true; |
508
|
|
|
|
509
|
|
|
// http://php.net/manual/en/book.mbstring.php |
510
|
|
|
self::$SUPPORT['mbstring'] = self::mbstring_loaded(); |
511
|
|
|
|
512
|
|
|
self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded(); |
513
|
|
|
if (self::$SUPPORT['mbstring'] === true) { |
514
|
|
|
\mb_internal_encoding('UTF-8'); |
515
|
|
|
\mb_regex_encoding('UTF-8'); |
516
|
|
|
self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8'; |
517
|
|
|
} |
518
|
|
|
|
519
|
|
|
// http://php.net/manual/en/book.iconv.php |
520
|
|
|
self::$SUPPORT['iconv'] = self::iconv_loaded(); |
521
|
|
|
|
522
|
|
|
// http://php.net/manual/en/book.intl.php |
523
|
|
|
self::$SUPPORT['intl'] = self::intl_loaded(); |
524
|
|
|
|
525
|
|
|
// http://php.net/manual/en/class.intlchar.php |
526
|
|
|
self::$SUPPORT['intlChar'] = self::intlChar_loaded(); |
527
|
|
|
|
528
|
|
|
// http://php.net/manual/en/book.ctype.php |
529
|
|
|
self::$SUPPORT['ctype'] = self::ctype_loaded(); |
530
|
|
|
|
531
|
|
|
// http://php.net/manual/en/class.finfo.php |
532
|
|
|
self::$SUPPORT['finfo'] = self::finfo_loaded(); |
533
|
|
|
|
534
|
|
|
// http://php.net/manual/en/book.json.php |
535
|
|
|
self::$SUPPORT['json'] = self::json_loaded(); |
536
|
|
|
|
537
|
|
|
// http://php.net/manual/en/book.pcre.php |
538
|
|
|
self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support(); |
539
|
|
|
|
540
|
|
|
self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used(); |
541
|
|
|
if (self::$SUPPORT['symfony_polyfill_used'] === true) { |
542
|
|
|
\mb_internal_encoding('UTF-8'); |
543
|
|
|
self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8'; |
544
|
|
|
} |
545
|
|
|
|
546
|
|
|
return true; |
547
|
|
|
} |
548
|
|
|
|
549
|
4 |
|
return null; |
550
|
|
|
} |
551
|
|
|
|
552
|
|
|
/** |
553
|
|
|
* Generates a UTF-8 encoded character from the given code point. |
554
|
|
|
* |
555
|
|
|
* INFO: opposite to UTF8::ord() |
556
|
|
|
* |
557
|
|
|
* EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code> |
558
|
|
|
* |
559
|
|
|
* @param int $code_point <p>The code point for which to generate a character.</p> |
560
|
|
|
* @param string $encoding [optional] <p>Default is UTF-8</p> |
561
|
|
|
* |
562
|
|
|
* @psalm-pure |
563
|
|
|
* |
564
|
|
|
* @return string|null |
565
|
|
|
* <p>Multi-byte character, returns null on failure or empty input.</p> |
566
|
|
|
*/ |
567
|
21 |
|
public static function chr($code_point, string $encoding = 'UTF-8') |
568
|
|
|
{ |
569
|
|
|
// init |
570
|
|
|
/** |
571
|
|
|
* @psalm-suppress ImpureStaticVariable |
572
|
|
|
* |
573
|
|
|
* @var array<string,string> |
574
|
|
|
*/ |
575
|
21 |
|
static $CHAR_CACHE = []; |
576
|
|
|
|
577
|
21 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
578
|
5 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
579
|
|
|
} |
580
|
|
|
|
581
|
|
|
if ( |
582
|
21 |
|
$encoding !== 'UTF-8' |
583
|
|
|
&& |
584
|
21 |
|
$encoding !== 'ISO-8859-1' |
585
|
|
|
&& |
586
|
21 |
|
$encoding !== 'WINDOWS-1252' |
587
|
|
|
&& |
588
|
21 |
|
self::$SUPPORT['mbstring'] === false |
589
|
|
|
) { |
590
|
|
|
/** |
591
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
592
|
|
|
*/ |
593
|
|
|
\trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
594
|
|
|
} |
595
|
|
|
|
596
|
21 |
|
if (!\is_int($code_point) || $code_point <= 0) { |
|
|
|
|
597
|
5 |
|
return null; |
598
|
|
|
} |
599
|
|
|
|
600
|
21 |
|
$cache_key = $code_point . '_' . $encoding; |
601
|
21 |
|
if (isset($CHAR_CACHE[$cache_key])) { |
602
|
19 |
|
return $CHAR_CACHE[$cache_key]; |
603
|
|
|
} |
604
|
|
|
|
605
|
10 |
|
if ($code_point <= 0x80) { // only for "simple"-chars |
606
|
|
|
|
607
|
9 |
|
if (self::$CHR === null) { |
608
|
1 |
|
self::$CHR = self::getData('chr'); |
609
|
|
|
} |
610
|
|
|
|
611
|
|
|
/** |
612
|
|
|
* @psalm-suppress PossiblyNullArrayAccess |
613
|
|
|
*/ |
614
|
9 |
|
$chr = self::$CHR[$code_point]; |
615
|
|
|
|
616
|
9 |
|
if ($encoding !== 'UTF-8') { |
617
|
1 |
|
$chr = self::encode($encoding, $chr); |
618
|
|
|
} |
619
|
|
|
|
620
|
9 |
|
return $CHAR_CACHE[$cache_key] = $chr; |
621
|
|
|
} |
622
|
|
|
|
623
|
|
|
// |
624
|
|
|
// fallback via "IntlChar" |
625
|
|
|
// |
626
|
|
|
|
627
|
6 |
|
if (self::$SUPPORT['intlChar'] === true) { |
628
|
6 |
|
$chr = \IntlChar::chr($code_point); |
629
|
|
|
|
630
|
6 |
|
if ($encoding !== 'UTF-8') { |
631
|
|
|
$chr = self::encode($encoding, $chr); |
632
|
|
|
} |
633
|
|
|
|
634
|
6 |
|
return $CHAR_CACHE[$cache_key] = $chr; |
635
|
|
|
} |
636
|
|
|
|
637
|
|
|
// |
638
|
|
|
// fallback via vanilla php |
639
|
|
|
// |
640
|
|
|
|
641
|
|
|
if (self::$CHR === null) { |
642
|
|
|
self::$CHR = self::getData('chr'); |
643
|
|
|
} |
644
|
|
|
|
645
|
|
|
$code_point = (int) $code_point; |
646
|
|
|
if ($code_point <= 0x7FF) { |
647
|
|
|
/** |
648
|
|
|
* @psalm-suppress PossiblyNullArrayAccess |
649
|
|
|
*/ |
650
|
|
|
$chr = self::$CHR[($code_point >> 6) + 0xC0] . |
651
|
|
|
self::$CHR[($code_point & 0x3F) + 0x80]; |
652
|
|
|
} elseif ($code_point <= 0xFFFF) { |
653
|
|
|
/** |
654
|
|
|
* @psalm-suppress PossiblyNullArrayAccess |
655
|
|
|
*/ |
656
|
|
|
$chr = self::$CHR[($code_point >> 12) + 0xE0] . |
657
|
|
|
self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] . |
658
|
|
|
self::$CHR[($code_point & 0x3F) + 0x80]; |
659
|
|
|
} else { |
660
|
|
|
/** |
661
|
|
|
* @psalm-suppress PossiblyNullArrayAccess |
662
|
|
|
*/ |
663
|
|
|
$chr = self::$CHR[($code_point >> 18) + 0xF0] . |
664
|
|
|
self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] . |
665
|
|
|
self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] . |
666
|
|
|
self::$CHR[($code_point & 0x3F) + 0x80]; |
667
|
|
|
} |
668
|
|
|
|
669
|
|
|
if ($encoding !== 'UTF-8') { |
670
|
|
|
$chr = self::encode($encoding, $chr); |
671
|
|
|
} |
672
|
|
|
|
673
|
|
|
return $CHAR_CACHE[$cache_key] = $chr; |
674
|
|
|
} |
675
|
|
|
|
676
|
|
|
/** |
677
|
|
|
* Applies callback to all characters of a string. |
678
|
|
|
* |
679
|
|
|
* EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code> |
680
|
|
|
* |
681
|
|
|
* @param callable $callback <p>The callback function.</p> |
682
|
|
|
* @param string $str <p>UTF-8 string to run callback on.</p> |
683
|
|
|
* |
684
|
|
|
* @psalm-pure |
685
|
|
|
* |
686
|
|
|
* @return string[] |
687
|
|
|
* <p>The outcome of the callback, as array.</p> |
688
|
|
|
*/ |
689
|
2 |
|
public static function chr_map($callback, string $str): array |
690
|
|
|
{ |
691
|
2 |
|
return \array_map( |
692
|
2 |
|
$callback, |
693
|
2 |
|
self::str_split($str) |
694
|
|
|
); |
695
|
|
|
} |
696
|
|
|
|
697
|
|
|
/** |
698
|
|
|
* Generates an array of byte length of each character of a Unicode string. |
699
|
|
|
* |
700
|
|
|
* 1 byte => U+0000 - U+007F |
701
|
|
|
* 2 byte => U+0080 - U+07FF |
702
|
|
|
* 3 byte => U+0800 - U+FFFF |
703
|
|
|
* 4 byte => U+10000 - U+10FFFF |
704
|
|
|
* |
705
|
|
|
* EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code> |
706
|
|
|
* |
707
|
|
|
* @param string $str <p>The original unicode string.</p> |
708
|
|
|
* |
709
|
|
|
* @psalm-pure |
710
|
|
|
* |
711
|
|
|
* @return int[] |
712
|
|
|
* <p>An array of byte lengths of each character.</p> |
713
|
|
|
*/ |
714
|
4 |
|
public static function chr_size_list(string $str): array |
715
|
|
|
{ |
716
|
4 |
|
if ($str === '') { |
717
|
4 |
|
return []; |
718
|
|
|
} |
719
|
|
|
|
720
|
4 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
721
|
|
|
return \array_map( |
722
|
|
|
static function (string $data): int { |
723
|
|
|
// "mb_" is available if overload is used, so use it ... |
724
|
|
|
return \mb_strlen($data, 'CP850'); // 8-BIT |
725
|
|
|
}, |
726
|
|
|
self::str_split($str) |
727
|
|
|
); |
728
|
|
|
} |
729
|
|
|
|
730
|
4 |
|
return \array_map('\strlen', self::str_split($str)); |
731
|
|
|
} |
732
|
|
|
|
733
|
|
|
/** |
734
|
|
|
* Get a decimal code representation of a specific character. |
735
|
|
|
* |
736
|
|
|
* INFO: opposite to UTF8::decimal_to_chr() |
737
|
|
|
* |
738
|
|
|
* EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code> |
739
|
|
|
* |
740
|
|
|
* @param string $char <p>The input character.</p> |
741
|
|
|
* |
742
|
|
|
* @psalm-pure |
743
|
|
|
* |
744
|
|
|
* @return int |
745
|
|
|
*/ |
746
|
5 |
|
public static function chr_to_decimal(string $char): int |
747
|
|
|
{ |
748
|
5 |
|
if (self::$SUPPORT['iconv'] === true) { |
749
|
5 |
|
$chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char); |
750
|
5 |
|
if ($chr_tmp !== false) { |
751
|
|
|
/** @phpstan-ignore-next-line - "unpack": only false if the format string contains errors */ |
752
|
5 |
|
return \unpack('V', $chr_tmp)[1]; |
753
|
|
|
} |
754
|
|
|
} |
755
|
|
|
|
756
|
|
|
$code = self::ord($char[0]); |
757
|
|
|
$bytes = 1; |
758
|
|
|
|
759
|
|
|
if (!($code & 0x80)) { |
760
|
|
|
// 0xxxxxxx |
761
|
|
|
return $code; |
762
|
|
|
} |
763
|
|
|
|
764
|
|
|
if (($code & 0xe0) === 0xc0) { |
765
|
|
|
// 110xxxxx |
766
|
|
|
$bytes = 2; |
767
|
|
|
$code &= ~0xc0; |
768
|
|
|
} elseif (($code & 0xf0) === 0xe0) { |
769
|
|
|
// 1110xxxx |
770
|
|
|
$bytes = 3; |
771
|
|
|
$code &= ~0xe0; |
772
|
|
|
} elseif (($code & 0xf8) === 0xf0) { |
773
|
|
|
// 11110xxx |
774
|
|
|
$bytes = 4; |
775
|
|
|
$code &= ~0xf0; |
776
|
|
|
} |
777
|
|
|
|
778
|
|
|
for ($i = 2; $i <= $bytes; ++$i) { |
779
|
|
|
// 10xxxxxx |
780
|
|
|
$code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80); |
781
|
|
|
} |
782
|
|
|
|
783
|
|
|
return $code; |
784
|
|
|
} |
785
|
|
|
|
786
|
|
|
/** |
787
|
|
|
* Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character. |
788
|
|
|
* |
789
|
|
|
* EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code> |
790
|
|
|
* |
791
|
|
|
* @param int|string $char <p>The input character</p> |
792
|
|
|
* @param string $prefix [optional] |
793
|
|
|
* |
794
|
|
|
* @psalm-pure |
795
|
|
|
* |
796
|
|
|
* @return string |
797
|
|
|
* <p>The code point encoded as U+xxxx.</p> |
798
|
|
|
*/ |
799
|
2 |
|
public static function chr_to_hex($char, string $prefix = 'U+'): string |
800
|
|
|
{ |
801
|
2 |
|
if ($char === '') { |
802
|
2 |
|
return ''; |
803
|
|
|
} |
804
|
|
|
|
805
|
2 |
|
if ($char === '�') { |
806
|
|
|
$char = ''; |
807
|
|
|
} |
808
|
|
|
|
809
|
2 |
|
return self::int_to_hex(self::ord((string) $char), $prefix); |
810
|
|
|
} |
811
|
|
|
|
812
|
|
|
/** |
813
|
|
|
* Splits a string into smaller chunks and multiple lines, using the specified line ending character. |
814
|
|
|
* |
815
|
|
|
* EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code> |
816
|
|
|
* |
817
|
|
|
* @param string $body <p>The original string to be split.</p> |
818
|
|
|
* @param int $chunk_length [optional] <p>The maximum character length of a chunk.</p> |
819
|
|
|
* @param string $end [optional] <p>The character(s) to be inserted at the end of each chunk.</p> |
820
|
|
|
* |
821
|
|
|
* @psalm-pure |
822
|
|
|
* |
823
|
|
|
* @return string |
824
|
|
|
* <p>The chunked string.</p> |
825
|
|
|
*/ |
826
|
4 |
|
public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string |
827
|
|
|
{ |
828
|
4 |
|
return \implode($end, self::str_split($body, $chunk_length)); |
829
|
|
|
} |
830
|
|
|
|
831
|
|
|
/** |
832
|
|
|
* Accepts a string and removes all non-UTF-8 characters from it + extras if needed. |
833
|
|
|
* |
834
|
|
|
* EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef …” — 😃 - Düsseldorf'</code> |
835
|
|
|
* |
836
|
|
|
* @param string $str <p>The string to be sanitized.</p> |
837
|
|
|
* @param bool $remove_bom [optional] <p>Set to true, if you need to remove |
838
|
|
|
* UTF-BOM.</p> |
839
|
|
|
* @param bool $normalize_whitespace [optional] <p>Set to true, if you need to normalize the |
840
|
|
|
* whitespace.</p> |
841
|
|
|
* @param bool $normalize_msword [optional] <p>Set to true, if you need to normalize MS |
842
|
|
|
* Word chars e.g.: "…" |
843
|
|
|
* => "..."</p> |
844
|
|
|
* @param bool $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, |
845
|
|
|
* in |
846
|
|
|
* combination with |
847
|
|
|
* $normalize_whitespace</p> |
848
|
|
|
* @param bool $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond |
849
|
|
|
* question mark e.g.: "�"</p> |
850
|
|
|
* @param bool $remove_invisible_characters [optional] <p>Set to false, if you not want to remove |
851
|
|
|
* invisible characters e.g.: "\0"</p> |
852
|
|
|
* @param bool $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove |
853
|
|
|
* invisible url encoded characters e.g.: "%0B"<br> WARNING: |
854
|
|
|
* maybe contains false-positives e.g. aa%0Baa -> aaaa. |
855
|
|
|
* </p> |
856
|
|
|
* |
857
|
|
|
* @psalm-pure |
858
|
|
|
* |
859
|
|
|
* @return string |
860
|
|
|
* <p>An clean UTF-8 encoded string.</p> |
861
|
|
|
*/ |
862
|
94 |
|
public static function clean( |
863
|
|
|
string $str, |
864
|
|
|
bool $remove_bom = false, |
865
|
|
|
bool $normalize_whitespace = false, |
866
|
|
|
bool $normalize_msword = false, |
867
|
|
|
bool $keep_non_breaking_space = false, |
868
|
|
|
bool $replace_diamond_question_mark = false, |
869
|
|
|
bool $remove_invisible_characters = true, |
870
|
|
|
bool $remove_invisible_characters_url_encoded = false |
871
|
|
|
): string { |
872
|
|
|
// http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string |
873
|
|
|
// caused connection reset problem on larger strings |
874
|
|
|
|
875
|
94 |
|
$regex = '/ |
876
|
|
|
( |
877
|
|
|
(?: [\x00-\x7F] # single-byte sequences 0xxxxxxx |
878
|
|
|
| [\xC0-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx |
879
|
|
|
| [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences 1110xxxx 10xxxxxx * 2 |
880
|
|
|
| [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3 |
881
|
|
|
){1,100} # ...one or more times |
882
|
|
|
) |
883
|
|
|
| ( [\x80-\xBF] ) # invalid byte in range 10000000 - 10111111 |
884
|
|
|
| ( [\xC0-\xFF] ) # invalid byte in range 11000000 - 11111111 |
885
|
|
|
/x'; |
886
|
94 |
|
$str = (string) \preg_replace($regex, '$1', $str); |
887
|
|
|
|
888
|
94 |
|
if ($replace_diamond_question_mark) { |
889
|
33 |
|
$str = self::replace_diamond_question_mark($str); |
890
|
|
|
} |
891
|
|
|
|
892
|
94 |
|
if ($remove_invisible_characters) { |
893
|
94 |
|
$str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded); |
894
|
|
|
} |
895
|
|
|
|
896
|
94 |
|
if ($normalize_whitespace) { |
897
|
37 |
|
$str = self::normalize_whitespace($str, $keep_non_breaking_space); |
898
|
|
|
} |
899
|
|
|
|
900
|
94 |
|
if ($normalize_msword) { |
901
|
4 |
|
$str = self::normalize_msword($str); |
902
|
|
|
} |
903
|
|
|
|
904
|
94 |
|
if ($remove_bom) { |
905
|
37 |
|
$str = self::remove_bom($str); |
906
|
|
|
} |
907
|
|
|
|
908
|
94 |
|
return $str; |
909
|
|
|
} |
910
|
|
|
|
911
|
|
|
/** |
912
|
|
|
* Clean-up a string and show only printable UTF-8 chars at the end + fix UTF-8 encoding. |
913
|
|
|
* |
914
|
|
|
* EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef …” — 😃 - Düsseldorf'</code> |
915
|
|
|
* |
916
|
|
|
* @param string $str <p>The input string.</p> |
917
|
|
|
* |
918
|
|
|
* @psalm-pure |
919
|
|
|
* |
920
|
|
|
* @return string |
921
|
|
|
*/ |
922
|
33 |
|
public static function cleanup($str): string |
923
|
|
|
{ |
924
|
|
|
// init |
925
|
33 |
|
$str = (string) $str; |
926
|
|
|
|
927
|
33 |
|
if ($str === '') { |
928
|
5 |
|
return ''; |
929
|
|
|
} |
930
|
|
|
|
931
|
|
|
// fixed ISO <-> UTF-8 Errors |
932
|
33 |
|
$str = self::fix_simple_utf8($str); |
933
|
|
|
|
934
|
|
|
// remove all none UTF-8 symbols |
935
|
|
|
// && remove diamond question mark (�) |
936
|
|
|
// && remove remove invisible characters (e.g. "\0") |
937
|
|
|
// && remove BOM |
938
|
|
|
// && normalize whitespace chars (but keep non-breaking-spaces) |
939
|
33 |
|
return self::clean( |
940
|
33 |
|
$str, |
941
|
33 |
|
true, |
942
|
33 |
|
true, |
943
|
33 |
|
false, |
944
|
33 |
|
true, |
945
|
33 |
|
true |
946
|
|
|
); |
947
|
|
|
} |
948
|
|
|
|
949
|
|
|
/** |
950
|
|
|
* Accepts a string or a array of strings and returns an array of Unicode code points. |
951
|
|
|
* |
952
|
|
|
* INFO: opposite to UTF8::string() |
953
|
|
|
* |
954
|
|
|
* EXAMPLE: <code> |
955
|
|
|
* UTF8::codepoints('κöñ'); // array(954, 246, 241) |
956
|
|
|
* // ... OR ... |
957
|
|
|
* UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1') |
958
|
|
|
* </code> |
959
|
|
|
* |
960
|
|
|
* @param string|string[] $arg <p>A UTF-8 encoded string or an array of such strings.</p> |
961
|
|
|
* @param bool $use_u_style <p>If True, will return code points in U+xxxx format, |
962
|
|
|
* default, code points will be returned as integers.</p> |
963
|
|
|
* |
964
|
|
|
* @psalm-pure |
965
|
|
|
* |
966
|
|
|
* @return int[]|string[] |
967
|
|
|
* <p> |
968
|
|
|
* The array of code points:<br> |
969
|
|
|
* int[] for $u_style === false<br> |
970
|
|
|
* string[] for $u_style === true<br> |
971
|
|
|
* </p> |
972
|
|
|
*/ |
973
|
12 |
|
public static function codepoints($arg, bool $use_u_style = false): array |
974
|
|
|
{ |
975
|
12 |
|
if (\is_string($arg)) { |
976
|
12 |
|
$arg = self::str_split($arg); |
977
|
|
|
} |
978
|
|
|
|
979
|
|
|
/** |
980
|
|
|
* @psalm-suppress DocblockTypeContradiction |
981
|
|
|
*/ |
982
|
12 |
|
if (!\is_array($arg)) { |
|
|
|
|
983
|
4 |
|
return []; |
984
|
|
|
} |
985
|
|
|
|
986
|
12 |
|
if ($arg === []) { |
987
|
7 |
|
return []; |
988
|
|
|
} |
989
|
|
|
|
990
|
11 |
|
$arg = \array_map( |
991
|
|
|
[ |
992
|
11 |
|
self::class, |
993
|
|
|
'ord', |
994
|
|
|
], |
995
|
11 |
|
$arg |
996
|
|
|
); |
997
|
|
|
|
998
|
11 |
|
if ($use_u_style) { |
999
|
2 |
|
$arg = \array_map( |
1000
|
|
|
[ |
1001
|
2 |
|
self::class, |
1002
|
|
|
'int_to_hex', |
1003
|
|
|
], |
1004
|
2 |
|
$arg |
1005
|
|
|
); |
1006
|
|
|
} |
1007
|
|
|
|
1008
|
11 |
|
return $arg; |
1009
|
|
|
} |
1010
|
|
|
|
1011
|
|
|
/** |
1012
|
|
|
* Trims the string and replaces consecutive whitespace characters with a |
1013
|
|
|
* single space. This includes tabs and newline characters, as well as |
1014
|
|
|
* multibyte whitespace such as the thin space and ideographic space. |
1015
|
|
|
* |
1016
|
|
|
* @param string $str <p>The input string.</p> |
1017
|
|
|
* |
1018
|
|
|
* @psalm-pure |
1019
|
|
|
* |
1020
|
|
|
* @return string |
1021
|
|
|
* <p>A string with trimmed $str and condensed whitespace.</p> |
1022
|
|
|
*/ |
1023
|
13 |
|
public static function collapse_whitespace(string $str): string |
1024
|
|
|
{ |
1025
|
13 |
|
if (self::$SUPPORT['mbstring'] === true) { |
1026
|
13 |
|
return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str)); |
1027
|
|
|
} |
1028
|
|
|
|
1029
|
|
|
return \trim(self::regex_replace($str, '[[:space:]]+', ' ')); |
1030
|
|
|
} |
1031
|
|
|
|
1032
|
|
|
/** |
1033
|
|
|
* Returns count of characters used in a string. |
1034
|
|
|
* |
1035
|
|
|
* EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code> |
1036
|
|
|
* |
1037
|
|
|
* @param string $str <p>The input string.</p> |
1038
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
1039
|
|
|
* @param bool $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use |
1040
|
|
|
* |
1041
|
|
|
* @psalm-pure |
1042
|
|
|
* |
1043
|
|
|
* @return int[] |
1044
|
|
|
* <p>An associative array of Character as keys and |
1045
|
|
|
* their count as values.</p> |
1046
|
|
|
*/ |
1047
|
25 |
|
public static function count_chars( |
1048
|
|
|
string $str, |
1049
|
|
|
bool $clean_utf8 = false, |
1050
|
|
|
bool $try_to_use_mb_functions = true |
1051
|
|
|
): array { |
1052
|
25 |
|
return \array_count_values( |
1053
|
25 |
|
self::str_split( |
1054
|
25 |
|
$str, |
1055
|
25 |
|
1, |
1056
|
|
|
$clean_utf8, |
1057
|
|
|
$try_to_use_mb_functions |
1058
|
|
|
) |
1059
|
|
|
); |
1060
|
|
|
} |
1061
|
|
|
|
1062
|
|
|
/** |
1063
|
|
|
* Create a valid CSS identifier for e.g. "class"- or "id"-attributes. |
1064
|
|
|
* |
1065
|
|
|
* EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code> |
1066
|
|
|
* |
1067
|
|
|
* copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95 |
1068
|
|
|
* |
1069
|
|
|
* @param string $str <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p> |
1070
|
|
|
* @param string[] $filter |
1071
|
|
|
* @param bool $strip_tags |
1072
|
|
|
* @param bool $strtolower |
1073
|
|
|
* |
1074
|
|
|
* @psalm-pure |
1075
|
|
|
* |
1076
|
|
|
* @return string |
1077
|
|
|
* |
1078
|
|
|
* @phpstan-param array<string,string> $filter |
1079
|
|
|
*/ |
1080
|
1 |
|
public static function css_identifier( |
1081
|
|
|
string $str = '', |
1082
|
|
|
array $filter = [ |
1083
|
|
|
' ' => '-', |
1084
|
|
|
'/' => '-', |
1085
|
|
|
'[' => '', |
1086
|
|
|
']' => '', |
1087
|
|
|
], |
1088
|
|
|
bool $strip_tags = false, |
1089
|
|
|
bool $strtolower = true |
1090
|
|
|
): string { |
1091
|
|
|
// We could also use strtr() here but its much slower than str_replace(). In |
1092
|
|
|
// order to keep '__' to stay '__' we first replace it with a different |
1093
|
|
|
// placeholder after checking that it is not defined as a filter. |
1094
|
1 |
|
$double_underscore_replacements = 0; |
1095
|
|
|
|
1096
|
|
|
// Fallback ... |
1097
|
1 |
|
if (\trim($str) === '') { |
1098
|
1 |
|
$str = \uniqid('auto-generated-css-class', true); |
1099
|
|
|
} else { |
1100
|
1 |
|
$str = self::clean($str); |
1101
|
|
|
} |
1102
|
|
|
|
1103
|
1 |
|
if ($strip_tags) { |
1104
|
|
|
$str = \strip_tags($str); |
1105
|
|
|
} |
1106
|
|
|
|
1107
|
1 |
|
if ($strtolower) { |
1108
|
1 |
|
$str = \strtolower($str); |
1109
|
|
|
} |
1110
|
|
|
|
1111
|
1 |
|
if (!isset($filter['__'])) { |
1112
|
1 |
|
$str = \str_replace('__', '##', $str, $double_underscore_replacements); |
1113
|
|
|
} |
1114
|
|
|
|
1115
|
1 |
|
$str = \str_replace(\array_keys($filter), \array_values($filter), $str); |
1116
|
|
|
// Replace temporary placeholder '##' with '__' only if the original |
1117
|
|
|
// $identifier contained '__'. |
1118
|
1 |
|
if ($double_underscore_replacements > 0) { |
1119
|
|
|
$str = \str_replace('##', '__', $str); |
1120
|
|
|
} |
1121
|
|
|
|
1122
|
|
|
// Valid characters in a CSS identifier are: |
1123
|
|
|
// - the hyphen (U+002D) |
1124
|
|
|
// - a-z (U+0030 - U+0039) |
1125
|
|
|
// - A-Z (U+0041 - U+005A) |
1126
|
|
|
// - the underscore (U+005F) |
1127
|
|
|
// - 0-9 (U+0061 - U+007A) |
1128
|
|
|
// - ISO 10646 characters U+00A1 and higher |
1129
|
|
|
// We strip out any character not in the above list. |
1130
|
1 |
|
$str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str); |
1131
|
|
|
// Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit. |
1132
|
1 |
|
$str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str); |
1133
|
|
|
|
1134
|
1 |
|
return \trim($str, '-'); |
1135
|
|
|
} |
1136
|
|
|
|
1137
|
|
|
/** |
1138
|
|
|
* Remove css media-queries. |
1139
|
|
|
* |
1140
|
|
|
* @param string $str |
1141
|
|
|
* |
1142
|
|
|
* @psalm-pure |
1143
|
|
|
* |
1144
|
|
|
* @return string |
1145
|
|
|
*/ |
1146
|
1 |
|
public static function css_stripe_media_queries(string $str): string |
1147
|
|
|
{ |
1148
|
1 |
|
return (string) \preg_replace( |
1149
|
1 |
|
'#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU', |
1150
|
1 |
|
'', |
1151
|
1 |
|
$str |
1152
|
|
|
); |
1153
|
|
|
} |
1154
|
|
|
|
1155
|
|
|
/** |
1156
|
|
|
* Checks whether ctype is available on the server. |
1157
|
|
|
* |
1158
|
|
|
* @psalm-pure |
1159
|
|
|
* |
1160
|
|
|
* @return bool |
1161
|
|
|
* <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> |
1162
|
|
|
* |
1163
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
1164
|
|
|
*/ |
1165
|
|
|
public static function ctype_loaded(): bool |
1166
|
|
|
{ |
1167
|
|
|
return \extension_loaded('ctype'); |
1168
|
|
|
} |
1169
|
|
|
|
1170
|
|
|
/** |
1171
|
|
|
* Converts an int value into a UTF-8 character. |
1172
|
|
|
* |
1173
|
|
|
* INFO: opposite to UTF8::string() |
1174
|
|
|
* |
1175
|
|
|
* EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code> |
1176
|
|
|
* |
1177
|
|
|
* @param int|string $int |
1178
|
|
|
* |
1179
|
|
|
* @phpstan-param int|numeric-string $int |
1180
|
|
|
* |
1181
|
|
|
* @psalm-pure |
1182
|
|
|
* |
1183
|
|
|
* @return string |
1184
|
|
|
*/ |
1185
|
20 |
|
public static function decimal_to_chr($int): string |
1186
|
|
|
{ |
1187
|
|
|
// We cannot use html_entity_decode() here, as it will not return |
1188
|
|
|
// characters for many values < 160. |
1189
|
20 |
|
return mb_convert_encoding('&#' . $int . ';', 'UTF-8', 'HTML-ENTITIES'); |
|
|
|
|
1190
|
|
|
} |
1191
|
|
|
|
1192
|
|
|
/** |
1193
|
|
|
* Decodes a MIME header field |
1194
|
|
|
* |
1195
|
|
|
* @param string $str |
1196
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
1197
|
|
|
* |
1198
|
|
|
* @psalm-pure |
1199
|
|
|
* |
1200
|
|
|
* @return false|string |
1201
|
|
|
* <p>A decoded MIME field on success, |
1202
|
|
|
* or false if an error occurs during the decoding.</p> |
1203
|
|
|
*/ |
1204
|
2 |
|
public static function decode_mimeheader($str, string $encoding = 'UTF-8') |
1205
|
|
|
{ |
1206
|
2 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
1207
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
1208
|
|
|
} |
1209
|
|
|
|
1210
|
|
|
// always fallback via symfony polyfill |
1211
|
2 |
|
return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding); |
1212
|
|
|
} |
1213
|
|
|
|
1214
|
|
|
/** |
1215
|
|
|
* Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji. |
1216
|
|
|
* |
1217
|
|
|
* @see https://en.wikipedia.org/wiki/ISO_3166-1 |
1218
|
|
|
* |
1219
|
|
|
* @param string $country_code_iso_3166_1 <p>e.g. DE</p> |
1220
|
|
|
* |
1221
|
|
|
* @return string |
1222
|
|
|
* <p>Emoji or empty string on error.</p> |
1223
|
|
|
*/ |
1224
|
1 |
|
public static function emoji_from_country_code(string $country_code_iso_3166_1): string |
1225
|
|
|
{ |
1226
|
1 |
|
if ($country_code_iso_3166_1 === '') { |
1227
|
1 |
|
return ''; |
1228
|
|
|
} |
1229
|
|
|
|
1230
|
1 |
|
if (self::strlen($country_code_iso_3166_1) !== 2) { |
1231
|
1 |
|
return ''; |
1232
|
|
|
} |
1233
|
|
|
|
1234
|
1 |
|
$country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1); |
1235
|
|
|
|
1236
|
1 |
|
$flagOffset = 0x1F1E6; |
1237
|
1 |
|
$asciiOffset = 0x41; |
1238
|
|
|
|
1239
|
1 |
|
return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') . |
1240
|
1 |
|
(self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? ''); |
1241
|
|
|
} |
1242
|
|
|
|
1243
|
|
|
/** |
1244
|
|
|
* Decodes a string which was encoded by "UTF8::emoji_encode()". |
1245
|
|
|
* |
1246
|
|
|
* INFO: opposite to UTF8::emoji_encode() |
1247
|
|
|
* |
1248
|
|
|
* EXAMPLE: <code> |
1249
|
|
|
* UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹' |
1250
|
|
|
* // |
1251
|
|
|
* UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹' |
1252
|
|
|
* </code> |
1253
|
|
|
* |
1254
|
|
|
* @param string $str <p>The input string.</p> |
1255
|
|
|
* @param bool $use_reversible_string_mappings [optional] <p> |
1256
|
|
|
* When <b>TRUE</b>, we se a reversible string mapping |
1257
|
|
|
* between "emoji_encode" and "emoji_decode".</p> |
1258
|
|
|
* |
1259
|
|
|
* @psalm-pure |
1260
|
|
|
* |
1261
|
|
|
* @return string |
1262
|
|
|
*/ |
1263
|
9 |
|
public static function emoji_decode( |
1264
|
|
|
string $str, |
1265
|
|
|
bool $use_reversible_string_mappings = false |
1266
|
|
|
): string { |
1267
|
9 |
|
if (self::$EMOJI_KEYS_CACHE === null) { |
1268
|
|
|
/** @phpstan-ignore-next-line - we need to load the data first */ |
1269
|
|
|
self::initEmojiData(); |
1270
|
|
|
} |
1271
|
|
|
|
1272
|
9 |
|
if ($use_reversible_string_mappings) { |
1273
|
9 |
|
return (string) \str_replace( |
1274
|
9 |
|
(array) self::$EMOJI_KEYS_REVERSIBLE_CACHE, |
1275
|
9 |
|
(array) self::$EMOJI_VALUES_CACHE, |
1276
|
9 |
|
$str |
1277
|
|
|
); |
1278
|
|
|
} |
1279
|
|
|
|
1280
|
1 |
|
return (string) \str_replace( |
1281
|
1 |
|
(array) self::$EMOJI_KEYS_CACHE, |
1282
|
1 |
|
(array) self::$EMOJI_VALUES_CACHE, |
1283
|
1 |
|
$str |
1284
|
|
|
); |
1285
|
|
|
} |
1286
|
|
|
|
1287
|
|
|
/** |
1288
|
|
|
* Encode a string with emoji chars into a non-emoji string. |
1289
|
|
|
* |
1290
|
|
|
* INFO: opposite to UTF8::emoji_decode() |
1291
|
|
|
* |
1292
|
|
|
* EXAMPLE: <code> |
1293
|
|
|
* UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE' |
1294
|
|
|
* // |
1295
|
|
|
* UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_' |
1296
|
|
|
* </code> |
1297
|
|
|
* |
1298
|
|
|
* @param string $str <p>The input string</p> |
1299
|
|
|
* @param bool $use_reversible_string_mappings [optional] <p> |
1300
|
|
|
* when <b>TRUE</b>, we use a reversible string mapping |
1301
|
|
|
* between "emoji_encode" and "emoji_decode"</p> |
1302
|
|
|
* |
1303
|
|
|
* @psalm-pure |
1304
|
|
|
* |
1305
|
|
|
* @return string |
1306
|
|
|
*/ |
1307
|
12 |
|
public static function emoji_encode( |
1308
|
|
|
string $str, |
1309
|
|
|
bool $use_reversible_string_mappings = false |
1310
|
|
|
): string { |
1311
|
12 |
|
if (self::$EMOJI_KEYS_CACHE === null) { |
1312
|
|
|
/** @phpstan-ignore-next-line - we need to load the data first */ |
1313
|
1 |
|
self::initEmojiData(); |
1314
|
|
|
} |
1315
|
|
|
|
1316
|
12 |
|
if ($use_reversible_string_mappings) { |
1317
|
9 |
|
return (string) \str_replace( |
1318
|
9 |
|
(array) self::$EMOJI_VALUES_CACHE, |
1319
|
9 |
|
(array) self::$EMOJI_KEYS_REVERSIBLE_CACHE, |
1320
|
9 |
|
$str |
1321
|
|
|
); |
1322
|
|
|
} |
1323
|
|
|
|
1324
|
4 |
|
return (string) \str_replace( |
1325
|
4 |
|
(array) self::$EMOJI_VALUES_CACHE, |
1326
|
4 |
|
(array) self::$EMOJI_KEYS_CACHE, |
1327
|
4 |
|
$str |
1328
|
|
|
); |
1329
|
|
|
} |
1330
|
|
|
|
1331
|
|
|
/** |
1332
|
|
|
* Encode a string with a new charset-encoding. |
1333
|
|
|
* |
1334
|
|
|
* INFO: This function will also try to fix broken / double encoding, |
1335
|
|
|
* so you can call this function also on a UTF-8 string and you don't mess up the string. |
1336
|
|
|
* |
1337
|
|
|
* EXAMPLE: <code> |
1338
|
|
|
* UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-' |
1339
|
|
|
* // |
1340
|
|
|
* UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-' |
1341
|
|
|
* // |
1342
|
|
|
* UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-中文空白-' |
1343
|
|
|
* // |
1344
|
|
|
* UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t' |
1345
|
|
|
* </code> |
1346
|
|
|
* |
1347
|
|
|
* @param string $to_encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p> |
1348
|
|
|
* @param string $str <p>The input string</p> |
1349
|
|
|
* @param bool $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double |
1350
|
|
|
* encoding for UTF-8)<br> otherwise we auto-detect the current |
1351
|
|
|
* string-encoding</p> |
1352
|
|
|
* @param string $from_encoding [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br> |
1353
|
|
|
* A empty string will trigger the autodetect anyway.</p> |
1354
|
|
|
* |
1355
|
|
|
* @psalm-pure |
1356
|
|
|
* |
1357
|
|
|
* @return string |
1358
|
|
|
* |
1359
|
|
|
* @psalm-suppress InvalidReturnStatement |
1360
|
|
|
*/ |
1361
|
28 |
|
public static function encode( |
1362
|
|
|
string $to_encoding, |
1363
|
|
|
string $str, |
1364
|
|
|
bool $auto_detect_the_from_encoding = true, |
1365
|
|
|
string $from_encoding = '' |
1366
|
|
|
): string { |
1367
|
28 |
|
if ($str === '' || $to_encoding === '') { |
1368
|
13 |
|
return $str; |
1369
|
|
|
} |
1370
|
|
|
|
1371
|
28 |
|
if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') { |
1372
|
7 |
|
$to_encoding = self::normalize_encoding($to_encoding, 'UTF-8'); |
1373
|
|
|
} |
1374
|
|
|
|
1375
|
28 |
|
if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') { |
1376
|
2 |
|
$from_encoding = self::normalize_encoding($from_encoding); |
1377
|
|
|
} |
1378
|
|
|
|
1379
|
|
|
if ( |
1380
|
28 |
|
$to_encoding |
1381
|
|
|
&& |
1382
|
28 |
|
$from_encoding |
1383
|
|
|
&& |
1384
|
28 |
|
$from_encoding === $to_encoding |
1385
|
|
|
) { |
1386
|
|
|
return $str; |
1387
|
|
|
} |
1388
|
|
|
|
1389
|
28 |
|
if ($to_encoding === 'JSON') { |
1390
|
1 |
|
$return = self::json_encode($str); |
1391
|
1 |
|
if ($return === false) { |
1392
|
|
|
throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().'); |
1393
|
|
|
} |
1394
|
|
|
|
1395
|
1 |
|
return $return; |
1396
|
|
|
} |
1397
|
28 |
|
if ($from_encoding === 'JSON') { |
1398
|
1 |
|
$str = self::json_decode($str); |
1399
|
1 |
|
$from_encoding = ''; |
1400
|
|
|
} |
1401
|
|
|
|
1402
|
28 |
|
if ($to_encoding === 'BASE64') { |
1403
|
2 |
|
return \base64_encode($str); |
1404
|
|
|
} |
1405
|
28 |
|
if ($from_encoding === 'BASE64') { |
1406
|
2 |
|
$str = \base64_decode($str, true); |
1407
|
2 |
|
$from_encoding = ''; |
1408
|
|
|
} |
1409
|
|
|
|
1410
|
28 |
|
if ($to_encoding === 'HTML-ENTITIES') { |
1411
|
2 |
|
return self::html_encode($str, true); |
1412
|
|
|
} |
1413
|
28 |
|
if ($from_encoding === 'HTML-ENTITIES') { |
1414
|
2 |
|
$str = self::html_entity_decode($str, \ENT_COMPAT); |
1415
|
2 |
|
$from_encoding = ''; |
1416
|
|
|
} |
1417
|
|
|
|
1418
|
28 |
|
$from_encoding_auto_detected = false; |
1419
|
|
|
if ( |
1420
|
28 |
|
$auto_detect_the_from_encoding |
1421
|
|
|
|| |
1422
|
28 |
|
!$from_encoding |
1423
|
|
|
) { |
1424
|
28 |
|
$from_encoding_auto_detected = self::str_detect_encoding($str); |
1425
|
|
|
} |
1426
|
|
|
|
1427
|
|
|
// DEBUG |
1428
|
|
|
//var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n"); |
1429
|
|
|
|
1430
|
28 |
|
if ($from_encoding_auto_detected !== false) { |
1431
|
25 |
|
$from_encoding = $from_encoding_auto_detected; |
1432
|
6 |
|
} elseif ($auto_detect_the_from_encoding) { |
1433
|
|
|
// fallback for the "autodetect"-mode |
1434
|
6 |
|
return self::to_utf8($str); |
1435
|
|
|
} |
1436
|
|
|
|
1437
|
|
|
if ( |
1438
|
25 |
|
!$from_encoding |
1439
|
|
|
|| |
1440
|
25 |
|
$from_encoding === $to_encoding |
1441
|
|
|
) { |
1442
|
15 |
|
return $str; |
1443
|
|
|
} |
1444
|
|
|
|
1445
|
|
|
if ( |
1446
|
20 |
|
$to_encoding === 'UTF-8' |
1447
|
|
|
&& |
1448
|
|
|
( |
1449
|
18 |
|
$from_encoding === 'WINDOWS-1252' |
1450
|
|
|
|| |
1451
|
20 |
|
$from_encoding === 'ISO-8859-1' |
1452
|
|
|
) |
1453
|
|
|
) { |
1454
|
13 |
|
return self::to_utf8($str); |
1455
|
|
|
} |
1456
|
|
|
|
1457
|
|
|
if ( |
1458
|
13 |
|
$to_encoding === 'ISO-8859-1' |
1459
|
|
|
&& |
1460
|
|
|
( |
1461
|
6 |
|
$from_encoding === 'WINDOWS-1252' |
1462
|
|
|
|| |
1463
|
13 |
|
$from_encoding === 'UTF-8' |
1464
|
|
|
) |
1465
|
|
|
) { |
1466
|
6 |
|
return self::to_iso8859($str); |
1467
|
|
|
} |
1468
|
|
|
|
1469
|
|
|
if ( |
1470
|
11 |
|
$to_encoding !== 'UTF-8' |
1471
|
|
|
&& |
1472
|
11 |
|
$to_encoding !== 'ISO-8859-1' |
1473
|
|
|
&& |
1474
|
11 |
|
$to_encoding !== 'WINDOWS-1252' |
1475
|
|
|
&& |
1476
|
11 |
|
self::$SUPPORT['mbstring'] === false |
1477
|
|
|
) { |
1478
|
|
|
/** |
1479
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
1480
|
|
|
*/ |
1481
|
|
|
\trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING); |
1482
|
|
|
} |
1483
|
|
|
|
1484
|
11 |
|
if (self::$SUPPORT['mbstring'] === true) { |
1485
|
11 |
|
$str_encoded = \mb_convert_encoding( |
1486
|
11 |
|
$str, |
1487
|
11 |
|
$to_encoding, |
1488
|
11 |
|
$from_encoding |
1489
|
|
|
); |
1490
|
|
|
|
1491
|
11 |
|
if ($str_encoded) { |
1492
|
|
|
\assert(\is_string($str_encoded)); |
1493
|
|
|
|
1494
|
11 |
|
return $str_encoded; |
1495
|
|
|
} |
1496
|
|
|
} |
1497
|
|
|
|
1498
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */ |
1499
|
|
|
$return = @\iconv($from_encoding, $to_encoding, $str); |
1500
|
|
|
if ($return !== false) { |
1501
|
|
|
return $return; |
1502
|
|
|
} |
1503
|
|
|
|
1504
|
|
|
return $str; |
1505
|
|
|
} |
1506
|
|
|
|
1507
|
|
|
/** |
1508
|
|
|
* @param string $str |
1509
|
|
|
* @param string $from_charset [optional] <p>Set the input charset.</p> |
1510
|
|
|
* @param string $to_charset [optional] <p>Set the output charset.</p> |
1511
|
|
|
* @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p> |
1512
|
|
|
* @param string $linefeed [optional] <p>Set the used linefeed.</p> |
1513
|
|
|
* @param int $indent [optional] <p>Set the max length indent.</p> |
1514
|
|
|
* |
1515
|
|
|
* @psalm-pure |
1516
|
|
|
* |
1517
|
|
|
* @return false|string |
1518
|
|
|
* <p>An encoded MIME field on success, |
1519
|
|
|
* or false if an error occurs during the encoding.</p> |
1520
|
|
|
*/ |
1521
|
1 |
|
public static function encode_mimeheader( |
1522
|
|
|
string $str, |
1523
|
|
|
string $from_charset = 'UTF-8', |
1524
|
|
|
string $to_charset = 'UTF-8', |
1525
|
|
|
string $transfer_encoding = 'Q', |
1526
|
|
|
string $linefeed = "\r\n", |
1527
|
|
|
int $indent = 76 |
1528
|
|
|
) { |
1529
|
1 |
|
if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') { |
1530
|
|
|
$from_charset = self::normalize_encoding($from_charset, 'UTF-8'); |
1531
|
|
|
} |
1532
|
|
|
|
1533
|
1 |
|
if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') { |
1534
|
1 |
|
$to_charset = self::normalize_encoding($to_charset, 'UTF-8'); |
1535
|
|
|
} |
1536
|
|
|
|
1537
|
|
|
// always fallback via symfony polyfill |
1538
|
1 |
|
return \iconv_mime_encode( |
1539
|
1 |
|
'', |
1540
|
1 |
|
$str, |
1541
|
|
|
[ |
1542
|
1 |
|
'scheme' => $transfer_encoding, |
1543
|
1 |
|
'line-length' => $indent, |
1544
|
1 |
|
'input-charset' => $from_charset, |
1545
|
1 |
|
'output-charset' => $to_charset, |
1546
|
1 |
|
'line-break-chars' => $linefeed, |
1547
|
|
|
] |
1548
|
|
|
); |
1549
|
|
|
} |
1550
|
|
|
|
1551
|
|
|
/** |
1552
|
|
|
* Create an extract from a sentence, so if the search-string was found, it try to centered in the output. |
1553
|
|
|
* |
1554
|
|
|
* @param string $str <p>The input string.</p> |
1555
|
|
|
* @param string $search <p>The searched string.</p> |
1556
|
|
|
* @param int|null $length [optional] <p>Default: null === text->length / 2</p> |
1557
|
|
|
* @param string $replacer_for_skipped_text [optional] <p>Default: …</p> |
1558
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
1559
|
|
|
* |
1560
|
|
|
* @psalm-pure |
1561
|
|
|
* |
1562
|
|
|
* @return string |
1563
|
|
|
*/ |
1564
|
1 |
|
public static function extract_text( |
1565
|
|
|
string $str, |
1566
|
|
|
string $search = '', |
1567
|
|
|
int $length = null, |
1568
|
|
|
string $replacer_for_skipped_text = '…', |
1569
|
|
|
string $encoding = 'UTF-8' |
1570
|
|
|
): string { |
1571
|
1 |
|
if ($str === '') { |
1572
|
1 |
|
return ''; |
1573
|
|
|
} |
1574
|
|
|
|
1575
|
1 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
1576
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
1577
|
|
|
} |
1578
|
|
|
|
1579
|
1 |
|
$trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&"; |
1580
|
|
|
|
1581
|
1 |
|
if ($length === null) { |
1582
|
1 |
|
$length = (int) \round((int) self::strlen($str, $encoding) / 2); |
1583
|
|
|
} |
1584
|
|
|
|
1585
|
1 |
|
if ($search === '') { |
1586
|
1 |
|
if ($encoding === 'UTF-8') { |
1587
|
1 |
|
if ($length > 0) { |
1588
|
1 |
|
$string_length = (int) \mb_strlen($str); |
1589
|
1 |
|
$end = ($length - 1) > $string_length ? $string_length : ($length - 1); |
1590
|
|
|
} else { |
1591
|
1 |
|
$end = 0; |
1592
|
|
|
} |
1593
|
|
|
|
1594
|
1 |
|
$pos = (int) \min( |
1595
|
1 |
|
\mb_strpos($str, ' ', $end), |
1596
|
1 |
|
\mb_strpos($str, '.', $end) |
1597
|
|
|
); |
1598
|
|
|
} else { |
1599
|
|
|
if ($length > 0) { |
1600
|
|
|
$string_length = (int) self::strlen($str, $encoding); |
1601
|
|
|
$end = ($length - 1) > $string_length ? $string_length : ($length - 1); |
1602
|
|
|
} else { |
1603
|
|
|
$end = 0; |
1604
|
|
|
} |
1605
|
|
|
|
1606
|
|
|
$pos = (int) \min( |
1607
|
|
|
self::strpos($str, ' ', $end, $encoding), |
1608
|
|
|
self::strpos($str, '.', $end, $encoding) |
1609
|
|
|
); |
1610
|
|
|
} |
1611
|
|
|
|
1612
|
1 |
|
if ($pos) { |
1613
|
1 |
|
if ($encoding === 'UTF-8') { |
1614
|
1 |
|
$str_sub = \mb_substr($str, 0, $pos); |
1615
|
|
|
} else { |
1616
|
|
|
$str_sub = self::substr($str, 0, $pos, $encoding); |
1617
|
|
|
} |
1618
|
|
|
|
1619
|
1 |
|
if ($str_sub === false) { |
1620
|
|
|
return ''; |
1621
|
|
|
} |
1622
|
|
|
|
1623
|
1 |
|
return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text; |
1624
|
|
|
} |
1625
|
|
|
|
1626
|
|
|
return $str; |
1627
|
|
|
} |
1628
|
|
|
|
1629
|
1 |
|
if ($encoding === 'UTF-8') { |
1630
|
1 |
|
$word_position = (int) \mb_stripos($str, $search); |
1631
|
1 |
|
$half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2); |
1632
|
|
|
} else { |
1633
|
|
|
$word_position = (int) self::stripos($str, $search, 0, $encoding); |
1634
|
|
|
$half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2); |
1635
|
|
|
} |
1636
|
|
|
|
1637
|
1 |
|
$pos_start = 0; |
1638
|
1 |
|
if ($half_side > 0) { |
1639
|
1 |
|
if ($encoding === 'UTF-8') { |
1640
|
1 |
|
$half_text = \mb_substr($str, 0, $half_side); |
1641
|
|
|
} else { |
1642
|
|
|
$half_text = self::substr($str, 0, $half_side, $encoding); |
1643
|
|
|
} |
1644
|
1 |
|
if ($half_text !== false) { |
1645
|
1 |
|
if ($encoding === 'UTF-8') { |
1646
|
1 |
|
$pos_start = (int) \max( |
1647
|
1 |
|
\mb_strrpos($half_text, ' '), |
1648
|
1 |
|
\mb_strrpos($half_text, '.') |
1649
|
|
|
); |
1650
|
|
|
} else { |
1651
|
|
|
$pos_start = (int) \max( |
1652
|
|
|
self::strrpos($half_text, ' ', 0, $encoding), |
1653
|
|
|
self::strrpos($half_text, '.', 0, $encoding) |
1654
|
|
|
); |
1655
|
|
|
} |
1656
|
|
|
} |
1657
|
|
|
} |
1658
|
|
|
|
1659
|
1 |
|
if ($word_position && $half_side > 0) { |
1660
|
1 |
|
$offset = $pos_start + $length - 1; |
1661
|
1 |
|
$real_length = (int) self::strlen($str, $encoding); |
1662
|
|
|
|
1663
|
1 |
|
if ($offset > $real_length) { |
1664
|
|
|
$offset = $real_length; |
1665
|
|
|
} |
1666
|
|
|
|
1667
|
1 |
|
if ($encoding === 'UTF-8') { |
1668
|
1 |
|
$pos_end = (int) \min( |
1669
|
1 |
|
\mb_strpos($str, ' ', $offset), |
1670
|
1 |
|
\mb_strpos($str, '.', $offset) |
1671
|
1 |
|
) - $pos_start; |
1672
|
|
|
} else { |
1673
|
|
|
$pos_end = (int) \min( |
1674
|
|
|
self::strpos($str, ' ', $offset, $encoding), |
1675
|
|
|
self::strpos($str, '.', $offset, $encoding) |
1676
|
|
|
) - $pos_start; |
1677
|
|
|
} |
1678
|
|
|
|
1679
|
1 |
|
if (!$pos_end || $pos_end <= 0) { |
1680
|
1 |
|
if ($encoding === 'UTF-8') { |
1681
|
1 |
|
$str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str)); |
1682
|
|
|
} else { |
1683
|
|
|
$str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding); |
1684
|
|
|
} |
1685
|
1 |
|
if ($str_sub !== false) { |
1686
|
1 |
|
$extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars); |
1687
|
|
|
} else { |
1688
|
1 |
|
$extract = ''; |
1689
|
|
|
} |
1690
|
|
|
} else { |
1691
|
1 |
|
if ($encoding === 'UTF-8') { |
1692
|
1 |
|
$str_sub = \mb_substr($str, $pos_start, $pos_end); |
1693
|
|
|
} else { |
1694
|
|
|
$str_sub = self::substr($str, $pos_start, $pos_end, $encoding); |
1695
|
|
|
} |
1696
|
1 |
|
if ($str_sub !== false) { |
1697
|
1 |
|
$extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text; |
1698
|
|
|
} else { |
1699
|
1 |
|
$extract = ''; |
1700
|
|
|
} |
1701
|
|
|
} |
1702
|
|
|
} else { |
1703
|
1 |
|
$offset = $length - 1; |
1704
|
1 |
|
$true_length = (int) self::strlen($str, $encoding); |
1705
|
|
|
|
1706
|
1 |
|
if ($offset > $true_length) { |
1707
|
|
|
$offset = $true_length; |
1708
|
|
|
} |
1709
|
|
|
|
1710
|
1 |
|
if ($encoding === 'UTF-8') { |
1711
|
1 |
|
$pos_end = (int) \min( |
1712
|
1 |
|
\mb_strpos($str, ' ', $offset), |
1713
|
1 |
|
\mb_strpos($str, '.', $offset) |
1714
|
|
|
); |
1715
|
|
|
} else { |
1716
|
|
|
$pos_end = (int) \min( |
1717
|
|
|
self::strpos($str, ' ', $offset, $encoding), |
1718
|
|
|
self::strpos($str, '.', $offset, $encoding) |
1719
|
|
|
); |
1720
|
|
|
} |
1721
|
|
|
|
1722
|
1 |
|
if ($pos_end) { |
1723
|
1 |
|
if ($encoding === 'UTF-8') { |
1724
|
1 |
|
$str_sub = \mb_substr($str, 0, $pos_end); |
1725
|
|
|
} else { |
1726
|
|
|
$str_sub = self::substr($str, 0, $pos_end, $encoding); |
1727
|
|
|
} |
1728
|
1 |
|
if ($str_sub !== false) { |
1729
|
1 |
|
$extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text; |
1730
|
|
|
} else { |
1731
|
1 |
|
$extract = ''; |
1732
|
|
|
} |
1733
|
|
|
} else { |
1734
|
1 |
|
$extract = $str; |
1735
|
|
|
} |
1736
|
|
|
} |
1737
|
|
|
|
1738
|
1 |
|
return $extract; |
1739
|
|
|
} |
1740
|
|
|
|
1741
|
|
|
/** |
1742
|
|
|
* Reads entire file into a string. |
1743
|
|
|
* |
1744
|
|
|
* EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code> |
1745
|
|
|
* |
1746
|
|
|
* WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!! |
1747
|
|
|
* |
1748
|
|
|
* @see http://php.net/manual/en/function.file-get-contents.php |
1749
|
|
|
* |
1750
|
|
|
* @param string $filename <p> |
1751
|
|
|
* Name of the file to read. |
1752
|
|
|
* </p> |
1753
|
|
|
* @param bool $use_include_path [optional] <p> |
1754
|
|
|
* Prior to PHP 5, this parameter is called |
1755
|
|
|
* use_include_path and is a bool. |
1756
|
|
|
* As of PHP 5 the FILE_USE_INCLUDE_PATH can be used |
1757
|
|
|
* to trigger include path |
1758
|
|
|
* search. |
1759
|
|
|
* </p> |
1760
|
|
|
* @param resource|null $context [optional] <p> |
1761
|
|
|
* A valid context resource created with |
1762
|
|
|
* stream_context_create. If you don't need to use a |
1763
|
|
|
* custom context, you can skip this parameter by &null;. |
1764
|
|
|
* </p> |
1765
|
|
|
* @param int|null $offset [optional] <p> |
1766
|
|
|
* The offset where the reading starts. |
1767
|
|
|
* </p> |
1768
|
|
|
* @param int|null $max_length [optional] <p> |
1769
|
|
|
* Maximum length of data read. The default is to read until end |
1770
|
|
|
* of file is reached. |
1771
|
|
|
* </p> |
1772
|
|
|
* @param int $timeout <p>The time in seconds for the timeout.</p> |
1773
|
|
|
* @param bool $convert_to_utf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for |
1774
|
|
|
* some files, because they used non default utf-8 chars. Binary files |
1775
|
|
|
* like images or pdf will not be converted.</p> |
1776
|
|
|
* @param string $from_encoding [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br> |
1777
|
|
|
* A empty string will trigger the autodetect anyway.</p> |
1778
|
|
|
* |
1779
|
|
|
* @psalm-pure |
1780
|
|
|
* |
1781
|
|
|
* @return false|string |
1782
|
|
|
* <p>The function returns the read data as string or <b>false</b> on failure.</p> |
1783
|
|
|
*/ |
1784
|
12 |
|
public static function file_get_contents( |
1785
|
|
|
string $filename, |
1786
|
|
|
bool $use_include_path = false, |
1787
|
|
|
$context = null, |
1788
|
|
|
int $offset = null, |
1789
|
|
|
int $max_length = null, |
1790
|
|
|
int $timeout = 10, |
1791
|
|
|
bool $convert_to_utf8 = true, |
1792
|
|
|
string $from_encoding = '' |
1793
|
|
|
) { |
1794
|
|
|
// init |
1795
|
|
|
/** @noinspection CallableParameterUseCaseInTypeContextInspection - is ok here */ |
1796
|
12 |
|
$filename = Bootup::filter_sanitize_string_polyfill($filename); |
1797
|
12 |
|
if ($filename === false) { |
1798
|
|
|
return false; |
1799
|
|
|
} |
1800
|
|
|
|
1801
|
12 |
|
if ($timeout && $context === null) { |
1802
|
9 |
|
$context = \stream_context_create( |
1803
|
|
|
[ |
1804
|
|
|
'http' => [ |
1805
|
9 |
|
'timeout' => $timeout, |
1806
|
|
|
], |
1807
|
|
|
] |
1808
|
|
|
); |
1809
|
|
|
} |
1810
|
|
|
|
1811
|
12 |
|
if ($offset === null) { |
1812
|
12 |
|
$offset = 0; |
1813
|
|
|
} |
1814
|
|
|
|
1815
|
12 |
|
if (\is_int($max_length)) { |
1816
|
|
|
|
1817
|
2 |
|
if ($max_length < 0) { |
1818
|
|
|
$max_length = 0; |
1819
|
|
|
} |
1820
|
|
|
|
1821
|
2 |
|
$data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length); |
1822
|
|
|
} else { |
1823
|
12 |
|
$data = \file_get_contents($filename, $use_include_path, $context, $offset); |
1824
|
|
|
} |
1825
|
|
|
|
1826
|
|
|
// return false on error |
1827
|
12 |
|
if ($data === false) { |
1828
|
|
|
return false; |
1829
|
|
|
} |
1830
|
|
|
|
1831
|
12 |
|
if ($convert_to_utf8) { |
1832
|
|
|
if ( |
1833
|
12 |
|
!self::is_binary($data, true) |
1834
|
|
|
|| |
1835
|
9 |
|
self::is_utf16($data, false) !== false |
1836
|
|
|
|| |
1837
|
12 |
|
self::is_utf32($data, false) !== false |
1838
|
|
|
) { |
1839
|
9 |
|
$data = self::encode('UTF-8', $data, false, $from_encoding); |
1840
|
9 |
|
$data = self::cleanup($data); |
1841
|
|
|
} |
1842
|
|
|
} |
1843
|
|
|
|
1844
|
12 |
|
return $data; |
1845
|
|
|
} |
1846
|
|
|
|
1847
|
|
|
/** |
1848
|
|
|
* Checks if a file starts with BOM (Byte Order Mark) character. |
1849
|
|
|
* |
1850
|
|
|
* EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code> |
1851
|
|
|
* |
1852
|
|
|
* @param string $file_path <p>Path to a valid file.</p> |
1853
|
|
|
* |
1854
|
|
|
* @throws \RuntimeException if file_get_contents() returned false |
1855
|
|
|
* |
1856
|
|
|
* @return bool |
1857
|
|
|
* <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p> |
1858
|
|
|
* |
1859
|
|
|
* @psalm-pure |
1860
|
|
|
*/ |
1861
|
2 |
|
public static function file_has_bom(string $file_path): bool |
1862
|
|
|
{ |
1863
|
2 |
|
$file_content = \file_get_contents($file_path); |
1864
|
2 |
|
if ($file_content === false) { |
1865
|
|
|
throw new \RuntimeException('file_get_contents() returned false for:' . $file_path); |
1866
|
|
|
} |
1867
|
|
|
|
1868
|
2 |
|
return self::string_has_bom($file_content); |
1869
|
|
|
} |
1870
|
|
|
|
1871
|
|
|
/** |
1872
|
|
|
* Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
1873
|
|
|
* |
1874
|
|
|
* EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code> |
1875
|
|
|
* |
1876
|
|
|
* @param array|object|string $var |
1877
|
|
|
* @param int $normalization_form |
1878
|
|
|
* @param string $leading_combining |
1879
|
|
|
* |
1880
|
|
|
* @psalm-pure |
1881
|
|
|
* |
1882
|
|
|
* @return mixed |
1883
|
|
|
* |
1884
|
|
|
* @template TFilter |
1885
|
|
|
* @phpstan-param TFilter $var |
1886
|
|
|
* @phpstan-return TFilter |
1887
|
|
|
*/ |
1888
|
64 |
|
public static function filter( |
1889
|
|
|
$var, |
1890
|
|
|
int $normalization_form = \Normalizer::NFC, |
1891
|
|
|
string $leading_combining = '◌' |
1892
|
|
|
) { |
1893
|
64 |
|
switch (\gettype($var)) { |
1894
|
64 |
|
case 'object': |
1895
|
64 |
|
case 'array': |
1896
|
6 |
|
foreach ($var as &$v) { |
1897
|
6 |
|
$v = self::filter($v, $normalization_form, $leading_combining); |
1898
|
|
|
} |
1899
|
6 |
|
unset($v); |
1900
|
|
|
|
1901
|
6 |
|
break; |
1902
|
64 |
|
case 'string': |
1903
|
|
|
|
1904
|
62 |
|
if (\strpos($var, "\r") !== false) { |
1905
|
2 |
|
$var = self::normalize_line_ending($var); |
1906
|
|
|
} |
1907
|
|
|
|
1908
|
62 |
|
if (!ASCII::is_ascii($var)) { |
1909
|
32 |
|
if (\Normalizer::isNormalized($var, $normalization_form)) { |
1910
|
26 |
|
$n = '-'; |
1911
|
|
|
} else { |
1912
|
12 |
|
$n = \Normalizer::normalize($var, $normalization_form); |
1913
|
|
|
|
1914
|
12 |
|
if ($n && isset($n[0])) { |
1915
|
6 |
|
$var = $n; |
1916
|
|
|
} else { |
1917
|
8 |
|
$var = self::encode('UTF-8', $var); |
1918
|
|
|
} |
1919
|
|
|
} |
1920
|
|
|
|
1921
|
|
|
\assert(\is_string($var)); |
1922
|
|
|
if ( |
1923
|
32 |
|
$n |
1924
|
|
|
&& |
1925
|
32 |
|
$var[0] >= "\x80" |
1926
|
|
|
&& |
1927
|
32 |
|
isset($n[0], $leading_combining[0]) |
1928
|
|
|
&& |
1929
|
32 |
|
\preg_match('/^\\p{Mn}/u', $var) |
1930
|
|
|
) { |
1931
|
|
|
// Prevent leading combining chars |
1932
|
|
|
// for NFC-safe concatenations. |
1933
|
2 |
|
$var = $leading_combining . $var; |
1934
|
|
|
} |
1935
|
|
|
} |
1936
|
|
|
|
1937
|
62 |
|
break; |
1938
|
|
|
default: |
1939
|
|
|
// nothing |
1940
|
|
|
} |
1941
|
|
|
|
1942
|
|
|
/** @noinspection PhpSillyAssignmentInspection */ |
1943
|
|
|
/** @phpstan-var TFilter $var */ |
1944
|
64 |
|
$var = $var; |
1945
|
|
|
|
1946
|
64 |
|
return $var; |
1947
|
|
|
} |
1948
|
|
|
|
1949
|
|
|
/** |
1950
|
|
|
* "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
1951
|
|
|
* |
1952
|
|
|
* Gets a specific external variable by name and optionally filters it. |
1953
|
|
|
* |
1954
|
|
|
* EXAMPLE: <code> |
1955
|
|
|
* // _GET['foo'] = 'bar'; |
1956
|
|
|
* UTF8::filter_input(INPUT_GET, 'foo', FILTER_UNSAFE_RAW)); // 'bar' |
1957
|
|
|
* </code> |
1958
|
|
|
* |
1959
|
|
|
* @see http://php.net/manual/en/function.filter-input.php |
1960
|
|
|
* |
1961
|
|
|
* @param int $type <p> |
1962
|
|
|
* One of <b>INPUT_GET</b>, <b>INPUT_POST</b>, |
1963
|
|
|
* <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or |
1964
|
|
|
* <b>INPUT_ENV</b>. |
1965
|
|
|
* </p> |
1966
|
|
|
* @param string $variable_name <p> |
1967
|
|
|
* Name of a variable to get. |
1968
|
|
|
* </p> |
1969
|
|
|
* @param int $filter [optional] <p> |
1970
|
|
|
* The ID of the filter to apply. The |
1971
|
|
|
* manual page lists the available filters. |
1972
|
|
|
* </p> |
1973
|
|
|
* @param int|int[]|null $options [optional] <p> |
1974
|
|
|
* Associative array of options or bitwise disjunction of flags. If filter |
1975
|
|
|
* accepts options, flags can be provided in "flags" field of array. |
1976
|
|
|
* </p> |
1977
|
|
|
* |
1978
|
|
|
* @psalm-pure |
1979
|
|
|
* |
1980
|
|
|
* @return mixed |
1981
|
|
|
* <p> |
1982
|
|
|
* Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the |
1983
|
|
|
* <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it |
1984
|
|
|
* returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails. |
1985
|
|
|
* </p> |
1986
|
|
|
*/ |
1987
|
1 |
|
public static function filter_input( |
1988
|
|
|
int $type, |
1989
|
|
|
string $variable_name, |
1990
|
|
|
int $filter = \FILTER_DEFAULT, |
1991
|
|
|
$options = null |
1992
|
|
|
) { |
1993
|
|
|
/** |
1994
|
|
|
* @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here |
1995
|
|
|
*/ |
1996
|
1 |
|
if ($options === null || \func_num_args() < 4) { |
1997
|
1 |
|
$var = \filter_input($type, $variable_name, $filter); |
1998
|
|
|
} else { |
1999
|
|
|
$var = \filter_input($type, $variable_name, $filter, $options); |
2000
|
|
|
} |
2001
|
|
|
|
2002
|
1 |
|
return self::filter($var); |
2003
|
|
|
} |
2004
|
|
|
|
2005
|
|
|
/** |
2006
|
|
|
* "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
2007
|
|
|
* |
2008
|
|
|
* Gets external variables and optionally filters them. |
2009
|
|
|
* |
2010
|
|
|
* EXAMPLE: <code> |
2011
|
|
|
* // _GET['foo'] = 'bar'; |
2012
|
|
|
* UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_UNSAFE_RAW')); // array('bar') |
2013
|
|
|
* </code> |
2014
|
|
|
* |
2015
|
|
|
* @see http://php.net/manual/en/function.filter-input-array.php |
2016
|
|
|
* |
2017
|
|
|
* @param int $type <p> |
2018
|
|
|
* One of <b>INPUT_GET</b>, <b>INPUT_POST</b>, |
2019
|
|
|
* <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or |
2020
|
|
|
* <b>INPUT_ENV</b>. |
2021
|
|
|
* </p> |
2022
|
|
|
* @param array|null $definition [optional] <p> |
2023
|
|
|
* An array defining the arguments. A valid key is a string |
2024
|
|
|
* containing a variable name and a valid value is either a filter type, or an array |
2025
|
|
|
* optionally specifying the filter, flags and options. If the value is an |
2026
|
|
|
* array, valid keys are filter which specifies the |
2027
|
|
|
* filter type, |
2028
|
|
|
* flags which specifies any flags that apply to the |
2029
|
|
|
* filter, and options which specifies any options that |
2030
|
|
|
* apply to the filter. See the example below for a better understanding. |
2031
|
|
|
* </p> |
2032
|
|
|
* <p> |
2033
|
|
|
* This parameter can be also an integer holding a filter constant. Then all values in the |
2034
|
|
|
* input array are filtered by this filter. |
2035
|
|
|
* </p> |
2036
|
|
|
* @param bool $add_empty [optional] <p> |
2037
|
|
|
* Add missing keys as <b>NULL</b> to the return value. |
2038
|
|
|
* </p> |
2039
|
|
|
* |
2040
|
|
|
* @psalm-pure |
2041
|
|
|
* |
2042
|
|
|
* @return mixed |
2043
|
|
|
* <p> |
2044
|
|
|
* An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. |
2045
|
|
|
* An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not |
2046
|
|
|
* set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable |
2047
|
|
|
* is not set and <b>NULL</b> if the filter fails. |
2048
|
|
|
* </p> |
2049
|
|
|
*/ |
2050
|
1 |
|
public static function filter_input_array( |
2051
|
|
|
int $type, |
2052
|
|
|
$definition = null, |
2053
|
|
|
bool $add_empty = true |
2054
|
|
|
) { |
2055
|
|
|
/** |
2056
|
|
|
* @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here |
2057
|
|
|
*/ |
2058
|
1 |
|
if ($definition === null || \func_num_args() < 2) { |
2059
|
|
|
$a = \filter_input_array($type); |
2060
|
|
|
} else { |
2061
|
1 |
|
$a = \filter_input_array($type, $definition, $add_empty); |
2062
|
|
|
} |
2063
|
|
|
|
2064
|
1 |
|
return self::filter($a); |
2065
|
|
|
} |
2066
|
|
|
|
2067
|
|
|
/** |
2068
|
|
|
* "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
2069
|
|
|
* |
2070
|
|
|
* Filters a variable with a specified filter. |
2071
|
|
|
* |
2072
|
|
|
* EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code> |
2073
|
|
|
* |
2074
|
|
|
* @see http://php.net/manual/en/function.filter-var.php |
2075
|
|
|
* |
2076
|
|
|
* @param float|int|string|null $variable <p> |
2077
|
|
|
* Value to filter. |
2078
|
|
|
* </p> |
2079
|
|
|
* @param int $filter [optional] <p> |
2080
|
|
|
* The ID of the filter to apply. The |
2081
|
|
|
* manual page lists the available filters. |
2082
|
|
|
* </p> |
2083
|
|
|
* @param int|int[]|null $options [optional] <p> |
2084
|
|
|
* Associative array of options or bitwise disjunction of flags. If filter |
2085
|
|
|
* accepts options, flags can be provided in "flags" field of array. For |
2086
|
|
|
* the "callback" filter, callable type should be passed. The |
2087
|
|
|
* callback must accept one argument, the value to be filtered, and return |
2088
|
|
|
* the value after filtering/sanitizing it. |
2089
|
|
|
* </p> |
2090
|
|
|
* <p> |
2091
|
|
|
* <code> |
2092
|
|
|
* // for filters that accept options, use this format |
2093
|
|
|
* $options = array( |
2094
|
|
|
* 'options' => array( |
2095
|
|
|
* 'default' => 3, // value to return if the filter fails |
2096
|
|
|
* // other options here |
2097
|
|
|
* 'min_range' => 0 |
2098
|
|
|
* ), |
2099
|
|
|
* 'flags' => FILTER_FLAG_ALLOW_OCTAL, |
2100
|
|
|
* ); |
2101
|
|
|
* $var = filter_var('0755', FILTER_VALIDATE_INT, $options); |
2102
|
|
|
* // for filter that only accept flags, you can pass them directly |
2103
|
|
|
* $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE); |
2104
|
|
|
* // for filter that only accept flags, you can also pass as an array |
2105
|
|
|
* $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, |
2106
|
|
|
* array('flags' => FILTER_NULL_ON_FAILURE)); |
2107
|
|
|
* // callback validate filter |
2108
|
|
|
* function foo($value) |
2109
|
|
|
* { |
2110
|
|
|
* // Expected format: Surname, GivenNames |
2111
|
|
|
* if (strpos($value, ", ") === false) return false; |
2112
|
|
|
* list($surname, $givennames) = explode(", ", $value, 2); |
2113
|
|
|
* $empty = (empty($surname) || empty($givennames)); |
2114
|
|
|
* $notstrings = (!is_string($surname) || !is_string($givennames)); |
2115
|
|
|
* if ($empty || $notstrings) { |
2116
|
|
|
* return false; |
2117
|
|
|
* } else { |
2118
|
|
|
* return $value; |
2119
|
|
|
* } |
2120
|
|
|
* } |
2121
|
|
|
* $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo')); |
2122
|
|
|
* </code> |
2123
|
|
|
* </p> |
2124
|
|
|
* |
2125
|
|
|
* @psalm-pure |
2126
|
|
|
* |
2127
|
|
|
* @return mixed |
2128
|
|
|
* <p>The filtered data, or <b>FALSE</b> if the filter fails.</p> |
2129
|
|
|
*/ |
2130
|
2 |
|
public static function filter_var( |
2131
|
|
|
$variable, |
2132
|
|
|
int $filter = \FILTER_DEFAULT, |
2133
|
|
|
$options = null |
2134
|
|
|
) { |
2135
|
|
|
/** |
2136
|
|
|
* @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here |
2137
|
|
|
*/ |
2138
|
2 |
|
if (\func_num_args() < 3) { |
2139
|
2 |
|
$variable = \filter_var($variable, $filter); |
2140
|
|
|
} else { |
2141
|
2 |
|
$variable = \filter_var($variable, $filter, $options); |
|
|
|
|
2142
|
|
|
} |
2143
|
|
|
|
2144
|
2 |
|
return self::filter($variable); |
2145
|
|
|
} |
2146
|
|
|
|
2147
|
|
|
/** |
2148
|
|
|
* "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
2149
|
|
|
* |
2150
|
|
|
* Gets multiple variables and optionally filters them. |
2151
|
|
|
* |
2152
|
|
|
* EXAMPLE: <code> |
2153
|
|
|
* $filters = [ |
2154
|
|
|
* 'name' => ['filter' => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']], |
2155
|
|
|
* 'age' => ['filter' => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]], |
2156
|
|
|
* 'email' => FILTER_VALIDATE_EMAIL, |
2157
|
|
|
* ]; |
2158
|
|
|
* |
2159
|
|
|
* $data = [ |
2160
|
|
|
* 'name' => 'κόσμε', |
2161
|
|
|
* 'age' => '18', |
2162
|
|
|
* 'email' => '[email protected]' |
2163
|
|
|
* ]; |
2164
|
|
|
* |
2165
|
|
|
* UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]'] |
2166
|
|
|
* </code> |
2167
|
|
|
* |
2168
|
|
|
* @see http://php.net/manual/en/function.filter-var-array.php |
2169
|
|
|
* |
2170
|
|
|
* @param array<mixed> $data <p> |
2171
|
|
|
* An array with string keys containing the data to filter. |
2172
|
|
|
* </p> |
2173
|
|
|
* @param array|int|null $definition [optional] <p> |
2174
|
|
|
* An array defining the arguments. A valid key is a string |
2175
|
|
|
* containing a variable name and a valid value is either a |
2176
|
|
|
* filter type, or an |
2177
|
|
|
* array optionally specifying the filter, flags and options. |
2178
|
|
|
* If the value is an array, valid keys are filter |
2179
|
|
|
* which specifies the filter type, |
2180
|
|
|
* flags which specifies any flags that apply to the |
2181
|
|
|
* filter, and options which specifies any options that |
2182
|
|
|
* apply to the filter. See the example below for a better understanding. |
2183
|
|
|
* </p> |
2184
|
|
|
* <p> |
2185
|
|
|
* This parameter can be also an integer holding a filter constant. Then all values |
2186
|
|
|
* in the input array are filtered by this filter. |
2187
|
|
|
* </p> |
2188
|
|
|
* @param bool $add_empty [optional] <p> |
2189
|
|
|
* Add missing keys as <b>NULL</b> to the return value. |
2190
|
|
|
* </p> |
2191
|
|
|
* |
2192
|
|
|
* @psalm-pure |
2193
|
|
|
* |
2194
|
|
|
* @return mixed |
2195
|
|
|
* <p> |
2196
|
|
|
* An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. |
2197
|
|
|
* An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not |
2198
|
|
|
* set. |
2199
|
|
|
* </p> |
2200
|
|
|
*/ |
2201
|
2 |
|
public static function filter_var_array( |
2202
|
|
|
array $data, |
2203
|
|
|
$definition = null, |
2204
|
|
|
bool $add_empty = true |
2205
|
|
|
) { |
2206
|
|
|
/** |
2207
|
|
|
* @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here |
2208
|
|
|
*/ |
2209
|
2 |
|
if (\func_num_args() < 2) { |
2210
|
2 |
|
$a = \filter_var_array($data); |
2211
|
|
|
} else { |
2212
|
2 |
|
$a = \filter_var_array($data, $definition, $add_empty); |
|
|
|
|
2213
|
|
|
} |
2214
|
|
|
|
2215
|
2 |
|
return self::filter($a); |
2216
|
|
|
} |
2217
|
|
|
|
2218
|
|
|
/** |
2219
|
|
|
* Checks whether finfo is available on the server. |
2220
|
|
|
* |
2221
|
|
|
* @psalm-pure |
2222
|
|
|
* |
2223
|
|
|
* @return bool |
2224
|
|
|
* <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> |
2225
|
|
|
* |
2226
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
2227
|
|
|
*/ |
2228
|
|
|
public static function finfo_loaded(): bool |
2229
|
|
|
{ |
2230
|
|
|
return \class_exists('finfo'); |
2231
|
|
|
} |
2232
|
|
|
|
2233
|
|
|
/** |
2234
|
|
|
* Returns the first $n characters of the string. |
2235
|
|
|
* |
2236
|
|
|
* @param string $str <p>The input string.</p> |
2237
|
|
|
* @param int $n <p>Number of characters to retrieve from the start.</p> |
2238
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
2239
|
|
|
* |
2240
|
|
|
* @psalm-pure |
2241
|
|
|
* |
2242
|
|
|
* @return string |
2243
|
|
|
*/ |
2244
|
13 |
|
public static function first_char( |
2245
|
|
|
string $str, |
2246
|
|
|
int $n = 1, |
2247
|
|
|
string $encoding = 'UTF-8' |
2248
|
|
|
): string { |
2249
|
13 |
|
if ($str === '' || $n <= 0) { |
2250
|
5 |
|
return ''; |
2251
|
|
|
} |
2252
|
|
|
|
2253
|
8 |
|
if ($encoding === 'UTF-8') { |
2254
|
4 |
|
return (string) \mb_substr($str, 0, $n); |
2255
|
|
|
} |
2256
|
|
|
|
2257
|
4 |
|
return (string) self::substr($str, 0, $n, $encoding); |
2258
|
|
|
} |
2259
|
|
|
|
2260
|
|
|
/** |
2261
|
|
|
* Check if the number of Unicode characters isn't greater than the specified integer. |
2262
|
|
|
* |
2263
|
|
|
* EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code> |
2264
|
|
|
* |
2265
|
|
|
* @param string $str the original string to be checked |
2266
|
|
|
* @param int $box_size the size in number of chars to be checked against string |
2267
|
|
|
* |
2268
|
|
|
* @psalm-pure |
2269
|
|
|
* |
2270
|
|
|
* @return bool |
2271
|
|
|
* <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p> |
2272
|
|
|
*/ |
2273
|
2 |
|
public static function fits_inside(string $str, int $box_size): bool |
2274
|
|
|
{ |
2275
|
2 |
|
return (int) self::strlen($str) <= $box_size; |
2276
|
|
|
} |
2277
|
|
|
|
2278
|
|
|
/** |
2279
|
|
|
* Try to fix simple broken UTF-8 strings. |
2280
|
|
|
* |
2281
|
|
|
* INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings. |
2282
|
|
|
* |
2283
|
|
|
* EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code> |
2284
|
|
|
* |
2285
|
|
|
* If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1 |
2286
|
|
|
* (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. |
2287
|
|
|
* See: http://en.wikipedia.org/wiki/Windows-1252 |
2288
|
|
|
* |
2289
|
|
|
* @param string $str <p>The input string</p> |
2290
|
|
|
* |
2291
|
|
|
* @psalm-pure |
2292
|
|
|
* |
2293
|
|
|
* @return string |
2294
|
|
|
*/ |
2295
|
46 |
|
public static function fix_simple_utf8(string $str): string |
2296
|
|
|
{ |
2297
|
46 |
|
if ($str === '') { |
2298
|
4 |
|
return ''; |
2299
|
|
|
} |
2300
|
|
|
|
2301
|
|
|
/** |
2302
|
|
|
* @psalm-suppress ImpureStaticVariable |
2303
|
|
|
* |
2304
|
|
|
* @var array<mixed>|null |
2305
|
|
|
*/ |
2306
|
46 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
2307
|
|
|
|
2308
|
|
|
/** |
2309
|
|
|
* @psalm-suppress ImpureStaticVariable |
2310
|
|
|
* |
2311
|
|
|
* @var array<mixed>|null |
2312
|
|
|
*/ |
2313
|
46 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
2314
|
|
|
|
2315
|
46 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
2316
|
1 |
|
if (self::$BROKEN_UTF8_FIX === null) { |
2317
|
1 |
|
self::$BROKEN_UTF8_FIX = self::getData('utf8_fix'); |
2318
|
|
|
} |
2319
|
|
|
|
2320
|
1 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX ?: []); |
2321
|
1 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX; |
2322
|
|
|
} |
2323
|
|
|
|
2324
|
|
|
\assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE)); |
2325
|
|
|
|
2326
|
46 |
|
return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
2327
|
|
|
} |
2328
|
|
|
|
2329
|
|
|
/** |
2330
|
|
|
* Fix a double (or multiple) encoded UTF8 string. |
2331
|
|
|
* |
2332
|
|
|
* EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code> |
2333
|
|
|
* |
2334
|
|
|
* @param string|string[] $str you can use a string or an array of strings |
2335
|
|
|
* |
2336
|
|
|
* @psalm-pure |
2337
|
|
|
* |
2338
|
|
|
* @return string|string[] |
2339
|
|
|
* <p>Will return the fixed input-"array" or |
2340
|
|
|
* the fixed input-"string".</p> |
2341
|
|
|
* |
2342
|
|
|
* @template TFixUtf8 |
2343
|
|
|
* @phpstan-param TFixUtf8 $str |
2344
|
|
|
* @phpstan-return TFixUtf8 |
2345
|
|
|
*/ |
2346
|
2 |
|
public static function fix_utf8($str) |
2347
|
|
|
{ |
2348
|
2 |
|
if (\is_array($str)) { |
2349
|
2 |
|
foreach ($str as &$v) { |
2350
|
2 |
|
$v = self::fix_utf8($v); |
2351
|
|
|
} |
2352
|
2 |
|
unset($v); |
2353
|
|
|
|
2354
|
|
|
/** |
2355
|
|
|
* @psalm-suppress InvalidReturnStatement |
2356
|
|
|
*/ |
2357
|
2 |
|
return $str; |
2358
|
|
|
} |
2359
|
|
|
|
2360
|
2 |
|
$str = (string) $str; |
2361
|
2 |
|
$last = ''; |
2362
|
2 |
|
while ($last !== $str) { |
2363
|
2 |
|
$last = $str; |
2364
|
|
|
/** |
2365
|
|
|
* @psalm-suppress PossiblyInvalidArgument |
2366
|
|
|
*/ |
2367
|
2 |
|
$str = self::to_utf8( |
2368
|
2 |
|
self::utf8_decode($str, true) |
2369
|
|
|
); |
2370
|
|
|
} |
2371
|
|
|
|
2372
|
|
|
/** |
2373
|
|
|
* @psalm-suppress InvalidReturnStatement |
2374
|
|
|
*/ |
2375
|
2 |
|
return $str; |
2376
|
|
|
} |
2377
|
|
|
|
2378
|
|
|
/** |
2379
|
|
|
* Get character of a specific character. |
2380
|
|
|
* |
2381
|
|
|
* EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code> |
2382
|
|
|
* |
2383
|
|
|
* @param string $char |
2384
|
|
|
* |
2385
|
|
|
* @psalm-pure |
2386
|
|
|
* |
2387
|
|
|
* @return string |
2388
|
|
|
* <p>'RTL' or 'LTR'.</p> |
2389
|
|
|
*/ |
2390
|
2 |
|
public static function getCharDirection(string $char): string |
2391
|
|
|
{ |
2392
|
2 |
|
if (self::$SUPPORT['intlChar'] === true) { |
2393
|
2 |
|
$tmp_return = \IntlChar::charDirection($char); |
2394
|
|
|
|
2395
|
|
|
// from "IntlChar"-Class |
2396
|
2 |
|
$char_direction = [ |
2397
|
|
|
'RTL' => [1, 13, 14, 15, 21], |
2398
|
|
|
'LTR' => [0, 11, 12, 20], |
2399
|
|
|
]; |
2400
|
|
|
|
2401
|
2 |
|
if (\in_array($tmp_return, $char_direction['LTR'], true)) { |
2402
|
|
|
return 'LTR'; |
2403
|
|
|
} |
2404
|
|
|
|
2405
|
2 |
|
if (\in_array($tmp_return, $char_direction['RTL'], true)) { |
2406
|
2 |
|
return 'RTL'; |
2407
|
|
|
} |
2408
|
|
|
} |
2409
|
|
|
|
2410
|
2 |
|
$c = static::chr_to_decimal($char); |
2411
|
|
|
|
2412
|
2 |
|
if (!($c >= 0x5be && $c <= 0x10b7f)) { |
2413
|
2 |
|
return 'LTR'; |
2414
|
|
|
} |
2415
|
|
|
|
2416
|
2 |
|
if ($c <= 0x85e) { |
2417
|
2 |
|
if ($c === 0x5be || |
2418
|
2 |
|
$c === 0x5c0 || |
2419
|
2 |
|
$c === 0x5c3 || |
2420
|
2 |
|
$c === 0x5c6 || |
2421
|
2 |
|
($c >= 0x5d0 && $c <= 0x5ea) || |
2422
|
2 |
|
($c >= 0x5f0 && $c <= 0x5f4) || |
2423
|
2 |
|
$c === 0x608 || |
2424
|
2 |
|
$c === 0x60b || |
2425
|
2 |
|
$c === 0x60d || |
2426
|
2 |
|
$c === 0x61b || |
2427
|
2 |
|
($c >= 0x61e && $c <= 0x64a) || |
2428
|
|
|
($c >= 0x66d && $c <= 0x66f) || |
2429
|
|
|
($c >= 0x671 && $c <= 0x6d5) || |
2430
|
|
|
($c >= 0x6e5 && $c <= 0x6e6) || |
2431
|
|
|
($c >= 0x6ee && $c <= 0x6ef) || |
2432
|
|
|
($c >= 0x6fa && $c <= 0x70d) || |
2433
|
|
|
$c === 0x710 || |
2434
|
|
|
($c >= 0x712 && $c <= 0x72f) || |
2435
|
|
|
($c >= 0x74d && $c <= 0x7a5) || |
2436
|
|
|
$c === 0x7b1 || |
2437
|
|
|
($c >= 0x7c0 && $c <= 0x7ea) || |
2438
|
|
|
($c >= 0x7f4 && $c <= 0x7f5) || |
2439
|
|
|
$c === 0x7fa || |
2440
|
|
|
($c >= 0x800 && $c <= 0x815) || |
2441
|
|
|
$c === 0x81a || |
2442
|
|
|
$c === 0x824 || |
2443
|
|
|
$c === 0x828 || |
2444
|
|
|
($c >= 0x830 && $c <= 0x83e) || |
2445
|
|
|
($c >= 0x840 && $c <= 0x858) || |
2446
|
2 |
|
$c === 0x85e |
2447
|
|
|
) { |
2448
|
2 |
|
return 'RTL'; |
2449
|
|
|
} |
2450
|
2 |
|
} elseif ($c === 0x200f) { |
2451
|
|
|
return 'RTL'; |
2452
|
2 |
|
} elseif ($c >= 0xfb1d) { |
2453
|
2 |
|
if ($c === 0xfb1d || |
2454
|
2 |
|
($c >= 0xfb1f && $c <= 0xfb28) || |
2455
|
2 |
|
($c >= 0xfb2a && $c <= 0xfb36) || |
2456
|
2 |
|
($c >= 0xfb38 && $c <= 0xfb3c) || |
2457
|
2 |
|
$c === 0xfb3e || |
2458
|
2 |
|
($c >= 0xfb40 && $c <= 0xfb41) || |
2459
|
2 |
|
($c >= 0xfb43 && $c <= 0xfb44) || |
2460
|
2 |
|
($c >= 0xfb46 && $c <= 0xfbc1) || |
2461
|
2 |
|
($c >= 0xfbd3 && $c <= 0xfd3d) || |
2462
|
2 |
|
($c >= 0xfd50 && $c <= 0xfd8f) || |
2463
|
2 |
|
($c >= 0xfd92 && $c <= 0xfdc7) || |
2464
|
2 |
|
($c >= 0xfdf0 && $c <= 0xfdfc) || |
2465
|
2 |
|
($c >= 0xfe70 && $c <= 0xfe74) || |
2466
|
2 |
|
($c >= 0xfe76 && $c <= 0xfefc) || |
2467
|
2 |
|
($c >= 0x10800 && $c <= 0x10805) || |
2468
|
2 |
|
$c === 0x10808 || |
2469
|
2 |
|
($c >= 0x1080a && $c <= 0x10835) || |
2470
|
2 |
|
($c >= 0x10837 && $c <= 0x10838) || |
2471
|
2 |
|
$c === 0x1083c || |
2472
|
2 |
|
($c >= 0x1083f && $c <= 0x10855) || |
2473
|
2 |
|
($c >= 0x10857 && $c <= 0x1085f) || |
2474
|
2 |
|
($c >= 0x10900 && $c <= 0x1091b) || |
2475
|
2 |
|
($c >= 0x10920 && $c <= 0x10939) || |
2476
|
2 |
|
$c === 0x1093f || |
2477
|
2 |
|
$c === 0x10a00 || |
2478
|
2 |
|
($c >= 0x10a10 && $c <= 0x10a13) || |
2479
|
2 |
|
($c >= 0x10a15 && $c <= 0x10a17) || |
2480
|
2 |
|
($c >= 0x10a19 && $c <= 0x10a33) || |
2481
|
2 |
|
($c >= 0x10a40 && $c <= 0x10a47) || |
2482
|
2 |
|
($c >= 0x10a50 && $c <= 0x10a58) || |
2483
|
2 |
|
($c >= 0x10a60 && $c <= 0x10a7f) || |
2484
|
2 |
|
($c >= 0x10b00 && $c <= 0x10b35) || |
2485
|
2 |
|
($c >= 0x10b40 && $c <= 0x10b55) || |
2486
|
2 |
|
($c >= 0x10b58 && $c <= 0x10b72) || |
2487
|
2 |
|
($c >= 0x10b78) |
2488
|
|
|
) { |
2489
|
2 |
|
return 'RTL'; |
2490
|
|
|
} |
2491
|
|
|
} |
2492
|
|
|
|
2493
|
2 |
|
return 'LTR'; |
2494
|
|
|
} |
2495
|
|
|
|
2496
|
|
|
/** |
2497
|
|
|
* Check for php-support. |
2498
|
|
|
* |
2499
|
|
|
* @param string|null $key |
2500
|
|
|
* |
2501
|
|
|
* @psalm-pure |
2502
|
|
|
* |
2503
|
|
|
* @return mixed |
2504
|
|
|
* Return the full support-"array", if $key === null<br> |
2505
|
|
|
* return bool-value, if $key is used and available<br> |
2506
|
|
|
* otherwise return <strong>null</strong> |
2507
|
|
|
*/ |
2508
|
27 |
|
public static function getSupportInfo(string $key = null) |
2509
|
|
|
{ |
2510
|
27 |
|
if ($key === null) { |
2511
|
4 |
|
return self::$SUPPORT; |
2512
|
|
|
} |
2513
|
|
|
|
2514
|
25 |
|
if (self::$INTL_TRANSLITERATOR_LIST === null) { |
2515
|
1 |
|
self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); |
2516
|
|
|
} |
2517
|
|
|
// compatibility fix for old versions |
2518
|
25 |
|
self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST; |
2519
|
|
|
|
2520
|
25 |
|
return self::$SUPPORT[$key] ?? null; |
2521
|
|
|
} |
2522
|
|
|
|
2523
|
|
|
/** |
2524
|
|
|
* Warning: this method only works for some file-types (png, jpg) |
2525
|
|
|
* if you need more supported types, please use e.g. "finfo" |
2526
|
|
|
* |
2527
|
|
|
* @param string $str |
2528
|
|
|
* @param array $fallback <p>with this keys: 'ext', 'mime', 'type' |
2529
|
|
|
* |
2530
|
|
|
* @psalm-pure |
2531
|
|
|
* |
2532
|
|
|
* @return null[]|string[] |
2533
|
|
|
* <p>with this keys: 'ext', 'mime', 'type'</p> |
2534
|
|
|
* |
2535
|
|
|
* @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback |
2536
|
|
|
*/ |
2537
|
39 |
|
public static function get_file_type( |
2538
|
|
|
string $str, |
2539
|
|
|
array $fallback = [ |
2540
|
|
|
'ext' => null, |
2541
|
|
|
'mime' => 'application/octet-stream', |
2542
|
|
|
'type' => null, |
2543
|
|
|
] |
2544
|
|
|
): array { |
2545
|
39 |
|
if ($str === '') { |
2546
|
|
|
return $fallback; |
2547
|
|
|
} |
2548
|
|
|
|
2549
|
|
|
/** @var false|string $str_info - needed for PhpStan (stubs error) */ |
2550
|
39 |
|
$str_info = \substr($str, 0, 2); |
2551
|
39 |
|
if ($str_info === false || \strlen($str_info) !== 2) { |
2552
|
10 |
|
return $fallback; |
2553
|
|
|
} |
2554
|
|
|
|
2555
|
|
|
// DEBUG |
2556
|
|
|
//var_dump($str_info); |
2557
|
|
|
|
2558
|
36 |
|
$str_info = \unpack('C2chars', $str_info); |
2559
|
|
|
|
2560
|
36 |
|
if ($str_info === false) { |
2561
|
|
|
return $fallback; |
2562
|
|
|
} |
2563
|
36 |
|
$type_code = (int) ($str_info['chars1'] . $str_info['chars2']); |
2564
|
|
|
|
2565
|
|
|
// DEBUG |
2566
|
|
|
//var_dump($type_code); |
2567
|
|
|
|
2568
|
|
|
// |
2569
|
|
|
// info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator |
2570
|
|
|
// |
2571
|
|
|
switch ($type_code) { |
2572
|
|
|
// WARNING: do not add too simple comparisons, because of false-positive results: |
2573
|
|
|
// |
2574
|
|
|
// 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip', |
2575
|
|
|
// 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ... |
2576
|
|
|
// |
2577
|
36 |
|
case 255216: |
2578
|
|
|
$ext = 'jpg'; |
2579
|
|
|
$mime = 'image/jpeg'; |
2580
|
|
|
$type = 'binary'; |
2581
|
|
|
|
2582
|
|
|
break; |
2583
|
36 |
|
case 13780: |
2584
|
7 |
|
$ext = 'png'; |
2585
|
7 |
|
$mime = 'image/png'; |
2586
|
7 |
|
$type = 'binary'; |
2587
|
|
|
|
2588
|
7 |
|
break; |
2589
|
|
|
default: |
2590
|
35 |
|
return $fallback; |
2591
|
|
|
} |
2592
|
|
|
|
2593
|
|
|
return [ |
2594
|
7 |
|
'ext' => $ext, |
2595
|
7 |
|
'mime' => $mime, |
2596
|
7 |
|
'type' => $type, |
2597
|
|
|
]; |
2598
|
|
|
} |
2599
|
|
|
|
2600
|
|
|
/** |
2601
|
|
|
* @param int $length <p>Length of the random string.</p> |
2602
|
|
|
* @param string $possible_chars [optional] <p>Characters string for the random selection.</p> |
2603
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
2604
|
|
|
* |
2605
|
|
|
* @return string |
2606
|
|
|
*/ |
2607
|
1 |
|
public static function get_random_string( |
2608
|
|
|
int $length, |
2609
|
|
|
string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', |
2610
|
|
|
string $encoding = 'UTF-8' |
2611
|
|
|
): string { |
2612
|
|
|
// init |
2613
|
1 |
|
$i = 0; |
2614
|
1 |
|
$str = ''; |
2615
|
|
|
|
2616
|
|
|
// |
2617
|
|
|
// add random chars |
2618
|
|
|
// |
2619
|
|
|
|
2620
|
1 |
|
if ($encoding === 'UTF-8') { |
2621
|
1 |
|
$max_length = (int) \mb_strlen($possible_chars); |
2622
|
1 |
|
if ($max_length === 0) { |
2623
|
1 |
|
return ''; |
2624
|
|
|
} |
2625
|
|
|
|
2626
|
1 |
|
while ($i < $length) { |
2627
|
|
|
try { |
2628
|
1 |
|
$rand_int = \random_int(0, $max_length - 1); |
2629
|
|
|
} catch (\Exception $e) { |
2630
|
|
|
$rand_int = \mt_rand(0, $max_length - 1); |
2631
|
|
|
} |
2632
|
1 |
|
$char = \mb_substr($possible_chars, $rand_int, 1); |
2633
|
1 |
|
if ($char !== false) { |
2634
|
1 |
|
$str .= $char; |
2635
|
1 |
|
++$i; |
2636
|
|
|
} |
2637
|
|
|
} |
2638
|
|
|
} else { |
2639
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
2640
|
|
|
|
2641
|
|
|
$max_length = (int) self::strlen($possible_chars, $encoding); |
2642
|
|
|
if ($max_length === 0) { |
2643
|
|
|
return ''; |
2644
|
|
|
} |
2645
|
|
|
|
2646
|
|
|
while ($i < $length) { |
2647
|
|
|
try { |
2648
|
|
|
$rand_int = \random_int(0, $max_length - 1); |
2649
|
|
|
} catch (\Exception $e) { |
2650
|
|
|
$rand_int = \mt_rand(0, $max_length - 1); |
2651
|
|
|
} |
2652
|
|
|
$char = self::substr($possible_chars, $rand_int, 1, $encoding); |
2653
|
|
|
if ($char !== false) { |
2654
|
|
|
$str .= $char; |
2655
|
|
|
++$i; |
2656
|
|
|
} |
2657
|
|
|
} |
2658
|
|
|
} |
2659
|
|
|
|
2660
|
1 |
|
return $str; |
2661
|
|
|
} |
2662
|
|
|
|
2663
|
|
|
/** |
2664
|
|
|
* @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p> |
2665
|
|
|
* @param bool $use_md5 [optional] <p>Return the unique identifier as md5-hash? Default: true</p> |
2666
|
|
|
* |
2667
|
|
|
* @return string |
2668
|
|
|
*/ |
2669
|
1 |
|
public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string |
2670
|
|
|
{ |
2671
|
|
|
try { |
2672
|
1 |
|
$rand_int = \random_int(0, \mt_getrandmax()); |
2673
|
|
|
} catch (\Exception $e) { |
2674
|
|
|
$rand_int = \mt_rand(0, \mt_getrandmax()); |
2675
|
|
|
} |
2676
|
|
|
|
2677
|
1 |
|
$unique_helper = $rand_int . |
2678
|
1 |
|
\session_id() . |
2679
|
1 |
|
($_SERVER['REMOTE_ADDR'] ?? '') . |
2680
|
1 |
|
($_SERVER['SERVER_ADDR'] ?? '') . |
2681
|
1 |
|
$extra_entropy; |
2682
|
|
|
|
2683
|
1 |
|
$unique_string = \uniqid($unique_helper, true); |
2684
|
|
|
|
2685
|
1 |
|
if ($use_md5) { |
2686
|
1 |
|
$unique_string = \md5($unique_string . $unique_helper); |
2687
|
|
|
} |
2688
|
|
|
|
2689
|
1 |
|
return $unique_string; |
2690
|
|
|
} |
2691
|
|
|
|
2692
|
|
|
/** |
2693
|
|
|
* Returns true if the string contains a lower case char, false otherwise. |
2694
|
|
|
* |
2695
|
|
|
* @param string $str <p>The input string.</p> |
2696
|
|
|
* |
2697
|
|
|
* @psalm-pure |
2698
|
|
|
* |
2699
|
|
|
* @return bool |
2700
|
|
|
* <p>Whether or not the string contains a lower case character.</p> |
2701
|
|
|
*/ |
2702
|
47 |
|
public static function has_lowercase(string $str): bool |
2703
|
|
|
{ |
2704
|
47 |
|
if (self::$SUPPORT['mbstring'] === true) { |
2705
|
47 |
|
return \mb_ereg_match('.*[[:lower:]]', $str); |
2706
|
|
|
} |
2707
|
|
|
|
2708
|
|
|
return self::str_matches_pattern($str, '.*[[:lower:]]'); |
2709
|
|
|
} |
2710
|
|
|
|
2711
|
|
|
/** |
2712
|
|
|
* Returns true if the string contains whitespace, false otherwise. |
2713
|
|
|
* |
2714
|
|
|
* @param string $str <p>The input string.</p> |
2715
|
|
|
* |
2716
|
|
|
* @psalm-pure |
2717
|
|
|
* |
2718
|
|
|
* @return bool |
2719
|
|
|
* <p>Whether or not the string contains whitespace.</p> |
2720
|
|
|
*/ |
2721
|
11 |
|
public static function has_whitespace(string $str): bool |
2722
|
|
|
{ |
2723
|
11 |
|
if (self::$SUPPORT['mbstring'] === true) { |
2724
|
11 |
|
return \mb_ereg_match('.*[[:space:]]', $str); |
2725
|
|
|
} |
2726
|
|
|
|
2727
|
|
|
return self::str_matches_pattern($str, '.*[[:space:]]'); |
2728
|
|
|
} |
2729
|
|
|
|
2730
|
|
|
/** |
2731
|
|
|
* Returns true if the string contains an upper case char, false otherwise. |
2732
|
|
|
* |
2733
|
|
|
* @param string $str <p>The input string.</p> |
2734
|
|
|
* |
2735
|
|
|
* @psalm-pure |
2736
|
|
|
* |
2737
|
|
|
* @return bool |
2738
|
|
|
* <p>Whether or not the string contains an upper case character.</p> |
2739
|
|
|
*/ |
2740
|
12 |
|
public static function has_uppercase(string $str): bool |
2741
|
|
|
{ |
2742
|
12 |
|
if (self::$SUPPORT['mbstring'] === true) { |
2743
|
12 |
|
return \mb_ereg_match('.*[[:upper:]]', $str); |
2744
|
|
|
} |
2745
|
|
|
|
2746
|
|
|
return self::str_matches_pattern($str, '.*[[:upper:]]'); |
2747
|
|
|
} |
2748
|
|
|
|
2749
|
|
|
/** |
2750
|
|
|
* Converts a hexadecimal value into a UTF-8 character. |
2751
|
|
|
* |
2752
|
|
|
* INFO: opposite to UTF8::chr_to_hex() |
2753
|
|
|
* |
2754
|
|
|
* EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code> |
2755
|
|
|
* |
2756
|
|
|
* @param string $hexdec <p>The hexadecimal value.</p> |
2757
|
|
|
* |
2758
|
|
|
* @psalm-pure |
2759
|
|
|
* |
2760
|
|
|
* @return false|string one single UTF-8 character |
2761
|
|
|
*/ |
2762
|
4 |
|
public static function hex_to_chr(string $hexdec) |
2763
|
|
|
{ |
2764
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */ |
2765
|
4 |
|
return self::decimal_to_chr((int) @\hexdec($hexdec)); |
2766
|
|
|
} |
2767
|
|
|
|
2768
|
|
|
/** |
2769
|
|
|
* Converts hexadecimal U+xxxx code point representation to integer. |
2770
|
|
|
* |
2771
|
|
|
* INFO: opposite to UTF8::int_to_hex() |
2772
|
|
|
* |
2773
|
|
|
* EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code> |
2774
|
|
|
* |
2775
|
|
|
* @param string $hexdec <p>The hexadecimal code point representation.</p> |
2776
|
|
|
* |
2777
|
|
|
* @psalm-pure |
2778
|
|
|
* |
2779
|
|
|
* @return false|int |
2780
|
|
|
* <p>The code point, or false on failure.</p> |
2781
|
|
|
*/ |
2782
|
2 |
|
public static function hex_to_int($hexdec) |
2783
|
|
|
{ |
2784
|
|
|
// init |
2785
|
2 |
|
$hexdec = (string) $hexdec; |
2786
|
|
|
|
2787
|
2 |
|
if ($hexdec === '') { |
2788
|
2 |
|
return false; |
2789
|
|
|
} |
2790
|
|
|
|
2791
|
2 |
|
if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) { |
2792
|
2 |
|
return \intval($match[1], 16); |
2793
|
|
|
} |
2794
|
|
|
|
2795
|
2 |
|
return false; |
2796
|
|
|
} |
2797
|
|
|
|
2798
|
|
|
/** |
2799
|
|
|
* Converts a UTF-8 string to a series of HTML numbered entities. |
2800
|
|
|
* |
2801
|
|
|
* INFO: opposite to UTF8::html_decode() |
2802
|
|
|
* |
2803
|
|
|
* EXAMPLE: <code>UTF8::html_encode('中文空白'); // '中文空白'</code> |
2804
|
|
|
* |
2805
|
|
|
* @param string $str <p>The Unicode string to be encoded as numbered entities.</p> |
2806
|
|
|
* @param bool $keep_ascii_chars [optional] <p>Keep ASCII chars.</p> |
2807
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
2808
|
|
|
* |
2809
|
|
|
* @psalm-pure |
2810
|
|
|
* |
2811
|
|
|
* @return string HTML numbered entities |
2812
|
|
|
*/ |
2813
|
14 |
|
public static function html_encode( |
2814
|
|
|
string $str, |
2815
|
|
|
bool $keep_ascii_chars = false, |
2816
|
|
|
string $encoding = 'UTF-8' |
2817
|
|
|
): string { |
2818
|
14 |
|
if ($str === '') { |
2819
|
4 |
|
return ''; |
2820
|
|
|
} |
2821
|
|
|
|
2822
|
14 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
2823
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
2824
|
|
|
} |
2825
|
|
|
|
2826
|
|
|
// INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity |
2827
|
14 |
|
if (self::$SUPPORT['mbstring'] === true) { |
2828
|
14 |
|
if ($keep_ascii_chars) { |
2829
|
13 |
|
$start_code = 0x80; |
2830
|
|
|
} else { |
2831
|
3 |
|
$start_code = 0x00; |
2832
|
|
|
} |
2833
|
|
|
|
2834
|
14 |
|
if ($encoding === 'UTF-8') { |
2835
|
|
|
/** @var false|string|null $return - needed for PhpStan (stubs error) */ |
2836
|
14 |
|
$return = \mb_encode_numericentity( |
|
|
|
|
2837
|
14 |
|
$str, |
2838
|
14 |
|
[$start_code, 0xfffff, 0, 0xfffff] |
2839
|
|
|
); |
2840
|
14 |
|
if ($return !== null && $return !== false) { |
2841
|
14 |
|
return $return; |
2842
|
|
|
} |
2843
|
|
|
} |
2844
|
|
|
|
2845
|
|
|
/** @var false|string|null $return - needed for PhpStan (stubs error) */ |
2846
|
4 |
|
$return = \mb_encode_numericentity( |
2847
|
4 |
|
$str, |
2848
|
4 |
|
[$start_code, 0xfffff, 0, 0xfffff], |
2849
|
4 |
|
$encoding |
2850
|
|
|
); |
2851
|
4 |
|
if ($return !== null && $return !== false) { |
2852
|
4 |
|
return $return; |
2853
|
|
|
} |
2854
|
|
|
} |
2855
|
|
|
|
2856
|
|
|
// |
2857
|
|
|
// fallback via vanilla php |
2858
|
|
|
// |
2859
|
|
|
|
2860
|
|
|
return \implode( |
2861
|
|
|
'', |
2862
|
|
|
\array_map( |
2863
|
|
|
static function (string $chr) use ($keep_ascii_chars, $encoding): string { |
2864
|
|
|
return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding); |
2865
|
|
|
}, |
2866
|
|
|
self::str_split($str) |
2867
|
|
|
) |
2868
|
|
|
); |
2869
|
|
|
} |
2870
|
|
|
|
2871
|
|
|
/** |
2872
|
|
|
* UTF-8 version of html_entity_decode() |
2873
|
|
|
* |
2874
|
|
|
* The reason we are not using html_entity_decode() by itself is because |
2875
|
|
|
* while it is not technically correct to leave out the semicolon |
2876
|
|
|
* at the end of an entity most browsers will still interpret the entity |
2877
|
|
|
* correctly. html_entity_decode() does not convert entities without |
2878
|
|
|
* semicolons, so we are left with our own little solution here. Bummer. |
2879
|
|
|
* |
2880
|
|
|
* Convert all HTML entities to their applicable characters. |
2881
|
|
|
* |
2882
|
|
|
* INFO: opposite to UTF8::html_encode() |
2883
|
|
|
* |
2884
|
|
|
* EXAMPLE: <code>UTF8::html_entity_decode('中文空白'); // '中文空白'</code> |
2885
|
|
|
* |
2886
|
|
|
* @see http://php.net/manual/en/function.html-entity-decode.php |
2887
|
|
|
* |
2888
|
|
|
* @param string $str <p> |
2889
|
|
|
* The input string. |
2890
|
|
|
* </p> |
2891
|
|
|
* @param int|null $flags [optional] <p> |
2892
|
|
|
* A bitmask of one or more of the following flags, which specify how to handle quotes |
2893
|
|
|
* and which document type to use. The default is ENT_COMPAT | ENT_HTML401. |
2894
|
|
|
* <table> |
2895
|
|
|
* Available <i>flags</i> constants |
2896
|
|
|
* <tr valign="top"> |
2897
|
|
|
* <td>Constant Name</td> |
2898
|
|
|
* <td>Description</td> |
2899
|
|
|
* </tr> |
2900
|
|
|
* <tr valign="top"> |
2901
|
|
|
* <td><b>ENT_COMPAT</b></td> |
2902
|
|
|
* <td>Will convert double-quotes and leave single-quotes alone.</td> |
2903
|
|
|
* </tr> |
2904
|
|
|
* <tr valign="top"> |
2905
|
|
|
* <td><b>ENT_QUOTES</b></td> |
2906
|
|
|
* <td>Will convert both double and single quotes.</td> |
2907
|
|
|
* </tr> |
2908
|
|
|
* <tr valign="top"> |
2909
|
|
|
* <td><b>ENT_NOQUOTES</b></td> |
2910
|
|
|
* <td>Will leave both double and single quotes unconverted.</td> |
2911
|
|
|
* </tr> |
2912
|
|
|
* <tr valign="top"> |
2913
|
|
|
* <td><b>ENT_HTML401</b></td> |
2914
|
|
|
* <td> |
2915
|
|
|
* Handle code as HTML 4.01. |
2916
|
|
|
* </td> |
2917
|
|
|
* </tr> |
2918
|
|
|
* <tr valign="top"> |
2919
|
|
|
* <td><b>ENT_XML1</b></td> |
2920
|
|
|
* <td> |
2921
|
|
|
* Handle code as XML 1. |
2922
|
|
|
* </td> |
2923
|
|
|
* </tr> |
2924
|
|
|
* <tr valign="top"> |
2925
|
|
|
* <td><b>ENT_XHTML</b></td> |
2926
|
|
|
* <td> |
2927
|
|
|
* Handle code as XHTML. |
2928
|
|
|
* </td> |
2929
|
|
|
* </tr> |
2930
|
|
|
* <tr valign="top"> |
2931
|
|
|
* <td><b>ENT_HTML5</b></td> |
2932
|
|
|
* <td> |
2933
|
|
|
* Handle code as HTML 5. |
2934
|
|
|
* </td> |
2935
|
|
|
* </tr> |
2936
|
|
|
* </table> |
2937
|
|
|
* </p> |
2938
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
2939
|
|
|
* |
2940
|
|
|
* @psalm-pure |
2941
|
|
|
* |
2942
|
|
|
* @return string the decoded string |
2943
|
|
|
*/ |
2944
|
34 |
|
public static function html_entity_decode( |
2945
|
|
|
string $str, |
2946
|
|
|
int $flags = null, |
2947
|
|
|
string $encoding = 'UTF-8' |
2948
|
|
|
): string { |
2949
|
|
|
if ( |
2950
|
34 |
|
!isset($str[3]) // examples: &; || &x; |
2951
|
|
|
|| |
2952
|
34 |
|
\strpos($str, '&') === false // no "&" |
2953
|
|
|
) { |
2954
|
23 |
|
return $str; |
2955
|
|
|
} |
2956
|
|
|
|
2957
|
34 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
2958
|
9 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
2959
|
|
|
} |
2960
|
|
|
|
2961
|
34 |
|
if ($flags === null) { |
2962
|
11 |
|
$flags = \ENT_QUOTES | \ENT_HTML5; |
2963
|
|
|
} |
2964
|
|
|
|
2965
|
|
|
if ( |
2966
|
34 |
|
$encoding !== 'UTF-8' |
2967
|
|
|
&& |
2968
|
34 |
|
$encoding !== 'ISO-8859-1' |
2969
|
|
|
&& |
2970
|
34 |
|
$encoding !== 'WINDOWS-1252' |
2971
|
|
|
&& |
2972
|
34 |
|
self::$SUPPORT['mbstring'] === false |
2973
|
|
|
) { |
2974
|
|
|
/** |
2975
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
2976
|
|
|
*/ |
2977
|
|
|
\trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
2978
|
|
|
} |
2979
|
|
|
|
2980
|
|
|
do { |
2981
|
34 |
|
$str_compare = $str; |
2982
|
|
|
|
2983
|
34 |
|
if (\strpos($str, '&') !== false) { |
2984
|
34 |
|
if (\strpos($str, '&#') !== false) { |
2985
|
|
|
// decode also numeric & UTF16 two byte entities |
2986
|
25 |
|
$str = (string) \preg_replace( |
2987
|
25 |
|
'/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S', |
2988
|
25 |
|
'$1;', |
2989
|
25 |
|
$str |
2990
|
|
|
); |
2991
|
|
|
} |
2992
|
|
|
|
2993
|
34 |
|
$str = \html_entity_decode( |
2994
|
34 |
|
$str, |
2995
|
34 |
|
$flags, |
2996
|
34 |
|
$encoding |
2997
|
|
|
); |
2998
|
|
|
} |
2999
|
34 |
|
} while ($str_compare !== $str); |
3000
|
|
|
|
3001
|
34 |
|
return $str; |
3002
|
|
|
} |
3003
|
|
|
|
3004
|
|
|
/** |
3005
|
|
|
* Create a escape html version of the string via "UTF8::htmlspecialchars()". |
3006
|
|
|
* |
3007
|
|
|
* @param string $str |
3008
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
3009
|
|
|
* |
3010
|
|
|
* @psalm-pure |
3011
|
|
|
* |
3012
|
|
|
* @return string |
3013
|
|
|
*/ |
3014
|
6 |
|
public static function html_escape(string $str, string $encoding = 'UTF-8'): string |
3015
|
|
|
{ |
3016
|
6 |
|
return self::htmlspecialchars( |
3017
|
6 |
|
$str, |
3018
|
6 |
|
\ENT_QUOTES | \ENT_SUBSTITUTE, |
3019
|
|
|
$encoding |
3020
|
|
|
); |
3021
|
|
|
} |
3022
|
|
|
|
3023
|
|
|
/** |
3024
|
|
|
* Remove empty html-tag. |
3025
|
|
|
* |
3026
|
|
|
* e.g.: <pre><tag></tag></pre> |
3027
|
|
|
* |
3028
|
|
|
* @param string $str |
3029
|
|
|
* |
3030
|
|
|
* @psalm-pure |
3031
|
|
|
* |
3032
|
|
|
* @return string |
3033
|
|
|
*/ |
3034
|
1 |
|
public static function html_stripe_empty_tags(string $str): string |
3035
|
|
|
{ |
3036
|
1 |
|
return (string) \preg_replace( |
3037
|
1 |
|
'/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u', |
3038
|
1 |
|
'', |
3039
|
1 |
|
$str |
3040
|
|
|
); |
3041
|
|
|
} |
3042
|
|
|
|
3043
|
|
|
/** |
3044
|
|
|
* Convert all applicable characters to HTML entities: UTF-8 version of htmlentities(). |
3045
|
|
|
* |
3046
|
|
|
* EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '<白-öäü>'</code> |
3047
|
|
|
* |
3048
|
|
|
* @see http://php.net/manual/en/function.htmlentities.php |
3049
|
|
|
* |
3050
|
|
|
* @param string $str <p> |
3051
|
|
|
* The input string. |
3052
|
|
|
* </p> |
3053
|
|
|
* @param int $flags [optional] <p> |
3054
|
|
|
* A bitmask of one or more of the following flags, which specify how to handle |
3055
|
|
|
* quotes, invalid code unit sequences and the used document type. The default is |
3056
|
|
|
* ENT_COMPAT | ENT_HTML401. |
3057
|
|
|
* <table> |
3058
|
|
|
* Available <i>flags</i> constants |
3059
|
|
|
* <tr valign="top"> |
3060
|
|
|
* <td>Constant Name</td> |
3061
|
|
|
* <td>Description</td> |
3062
|
|
|
* </tr> |
3063
|
|
|
* <tr valign="top"> |
3064
|
|
|
* <td><b>ENT_COMPAT</b></td> |
3065
|
|
|
* <td>Will convert double-quotes and leave single-quotes alone.</td> |
3066
|
|
|
* </tr> |
3067
|
|
|
* <tr valign="top"> |
3068
|
|
|
* <td><b>ENT_QUOTES</b></td> |
3069
|
|
|
* <td>Will convert both double and single quotes.</td> |
3070
|
|
|
* </tr> |
3071
|
|
|
* <tr valign="top"> |
3072
|
|
|
* <td><b>ENT_NOQUOTES</b></td> |
3073
|
|
|
* <td>Will leave both double and single quotes unconverted.</td> |
3074
|
|
|
* </tr> |
3075
|
|
|
* <tr valign="top"> |
3076
|
|
|
* <td><b>ENT_IGNORE</b></td> |
3077
|
|
|
* <td> |
3078
|
|
|
* Silently discard invalid code unit sequences instead of returning |
3079
|
|
|
* an empty string. Using this flag is discouraged as it |
3080
|
|
|
* may have security implications. |
3081
|
|
|
* </td> |
3082
|
|
|
* </tr> |
3083
|
|
|
* <tr valign="top"> |
3084
|
|
|
* <td><b>ENT_SUBSTITUTE</b></td> |
3085
|
|
|
* <td> |
3086
|
|
|
* Replace invalid code unit sequences with a Unicode Replacement Character |
3087
|
|
|
* U+FFFD (UTF-8) or &#38;#FFFD; (otherwise) instead of returning an empty |
3088
|
|
|
* string. |
3089
|
|
|
* </td> |
3090
|
|
|
* </tr> |
3091
|
|
|
* <tr valign="top"> |
3092
|
|
|
* <td><b>ENT_DISALLOWED</b></td> |
3093
|
|
|
* <td> |
3094
|
|
|
* Replace invalid code points for the given document type with a |
3095
|
|
|
* Unicode Replacement Character U+FFFD (UTF-8) or &#38;#FFFD; |
3096
|
|
|
* (otherwise) instead of leaving them as is. This may be useful, for |
3097
|
|
|
* instance, to ensure the well-formedness of XML documents with |
3098
|
|
|
* embedded external content. |
3099
|
|
|
* </td> |
3100
|
|
|
* </tr> |
3101
|
|
|
* <tr valign="top"> |
3102
|
|
|
* <td><b>ENT_HTML401</b></td> |
3103
|
|
|
* <td> |
3104
|
|
|
* Handle code as HTML 4.01. |
3105
|
|
|
* </td> |
3106
|
|
|
* </tr> |
3107
|
|
|
* <tr valign="top"> |
3108
|
|
|
* <td><b>ENT_XML1</b></td> |
3109
|
|
|
* <td> |
3110
|
|
|
* Handle code as XML 1. |
3111
|
|
|
* </td> |
3112
|
|
|
* </tr> |
3113
|
|
|
* <tr valign="top"> |
3114
|
|
|
* <td><b>ENT_XHTML</b></td> |
3115
|
|
|
* <td> |
3116
|
|
|
* Handle code as XHTML. |
3117
|
|
|
* </td> |
3118
|
|
|
* </tr> |
3119
|
|
|
* <tr valign="top"> |
3120
|
|
|
* <td><b>ENT_HTML5</b></td> |
3121
|
|
|
* <td> |
3122
|
|
|
* Handle code as HTML 5. |
3123
|
|
|
* </td> |
3124
|
|
|
* </tr> |
3125
|
|
|
* </table> |
3126
|
|
|
* </p> |
3127
|
|
|
* @param string $encoding [optional] <p> |
3128
|
|
|
* Like <b>htmlspecialchars</b>, |
3129
|
|
|
* <b>htmlentities</b> takes an optional third argument |
3130
|
|
|
* <i>encoding</i> which defines encoding used in |
3131
|
|
|
* conversion. |
3132
|
|
|
* Although this argument is technically optional, you are highly |
3133
|
|
|
* encouraged to specify the correct value for your code. |
3134
|
|
|
* </p> |
3135
|
|
|
* @param bool $double_encode [optional] <p> |
3136
|
|
|
* When <i>double_encode</i> is turned off PHP will not |
3137
|
|
|
* encode existing html entities. The default is to convert everything. |
3138
|
|
|
* </p> |
3139
|
|
|
* |
3140
|
|
|
* @psalm-pure |
3141
|
|
|
* |
3142
|
|
|
* @return string |
3143
|
|
|
* <p> |
3144
|
|
|
* The encoded string. |
3145
|
|
|
* <br><br> |
3146
|
|
|
* If the input <i>string</i> contains an invalid code unit |
3147
|
|
|
* sequence within the given <i>encoding</i> an empty string |
3148
|
|
|
* will be returned, unless either the <b>ENT_IGNORE</b> or |
3149
|
|
|
* <b>ENT_SUBSTITUTE</b> flags are set. |
3150
|
|
|
* </p> |
3151
|
|
|
*/ |
3152
|
9 |
|
public static function htmlentities( |
3153
|
|
|
string $str, |
3154
|
|
|
int $flags = \ENT_COMPAT, |
3155
|
|
|
string $encoding = 'UTF-8', |
3156
|
|
|
bool $double_encode = true |
3157
|
|
|
): string { |
3158
|
9 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
3159
|
7 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
3160
|
|
|
} |
3161
|
|
|
|
3162
|
9 |
|
$str = \htmlentities( |
3163
|
9 |
|
$str, |
3164
|
9 |
|
$flags, |
3165
|
9 |
|
$encoding, |
3166
|
9 |
|
$double_encode |
3167
|
|
|
); |
3168
|
|
|
|
3169
|
|
|
/** |
3170
|
|
|
* PHP doesn't replace a backslash to its html entity since this is something |
3171
|
|
|
* that's mostly used to escape characters when inserting in a database. Since |
3172
|
|
|
* we're using a decent database layer, we don't need this shit and we're replacing |
3173
|
|
|
* the double backslashes by its' html entity equivalent. |
3174
|
|
|
* |
3175
|
|
|
* https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303 |
3176
|
|
|
*/ |
3177
|
9 |
|
$str = \str_replace('\\', '\', $str); |
3178
|
|
|
|
3179
|
9 |
|
return self::html_encode($str, true, $encoding); |
3180
|
|
|
} |
3181
|
|
|
|
3182
|
|
|
/** |
3183
|
|
|
* Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars() |
3184
|
|
|
* |
3185
|
|
|
* INFO: Take a look at "UTF8::htmlentities()" |
3186
|
|
|
* |
3187
|
|
|
* EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '<白-öäü>'</code> |
3188
|
|
|
* |
3189
|
|
|
* @see http://php.net/manual/en/function.htmlspecialchars.php |
3190
|
|
|
* |
3191
|
|
|
* @param string $str <p> |
3192
|
|
|
* The string being converted. |
3193
|
|
|
* </p> |
3194
|
|
|
* @param int $flags [optional] <p> |
3195
|
|
|
* A bitmask of one or more of the following flags, which specify how to handle |
3196
|
|
|
* quotes, invalid code unit sequences and the used document type. The default is |
3197
|
|
|
* ENT_COMPAT | ENT_HTML401. |
3198
|
|
|
* <table> |
3199
|
|
|
* Available <i>flags</i> constants |
3200
|
|
|
* <tr valign="top"> |
3201
|
|
|
* <td>Constant Name</td> |
3202
|
|
|
* <td>Description</td> |
3203
|
|
|
* </tr> |
3204
|
|
|
* <tr valign="top"> |
3205
|
|
|
* <td><b>ENT_COMPAT</b></td> |
3206
|
|
|
* <td>Will convert double-quotes and leave single-quotes alone.</td> |
3207
|
|
|
* </tr> |
3208
|
|
|
* <tr valign="top"> |
3209
|
|
|
* <td><b>ENT_QUOTES</b></td> |
3210
|
|
|
* <td>Will convert both double and single quotes.</td> |
3211
|
|
|
* </tr> |
3212
|
|
|
* <tr valign="top"> |
3213
|
|
|
* <td><b>ENT_NOQUOTES</b></td> |
3214
|
|
|
* <td>Will leave both double and single quotes unconverted.</td> |
3215
|
|
|
* </tr> |
3216
|
|
|
* <tr valign="top"> |
3217
|
|
|
* <td><b>ENT_IGNORE</b></td> |
3218
|
|
|
* <td> |
3219
|
|
|
* Silently discard invalid code unit sequences instead of returning |
3220
|
|
|
* an empty string. Using this flag is discouraged as it |
3221
|
|
|
* may have security implications. |
3222
|
|
|
* </td> |
3223
|
|
|
* </tr> |
3224
|
|
|
* <tr valign="top"> |
3225
|
|
|
* <td><b>ENT_SUBSTITUTE</b></td> |
3226
|
|
|
* <td> |
3227
|
|
|
* Replace invalid code unit sequences with a Unicode Replacement Character |
3228
|
|
|
* U+FFFD (UTF-8) or &#38;#FFFD; (otherwise) instead of returning an empty |
3229
|
|
|
* string. |
3230
|
|
|
* </td> |
3231
|
|
|
* </tr> |
3232
|
|
|
* <tr valign="top"> |
3233
|
|
|
* <td><b>ENT_DISALLOWED</b></td> |
3234
|
|
|
* <td> |
3235
|
|
|
* Replace invalid code points for the given document type with a |
3236
|
|
|
* Unicode Replacement Character U+FFFD (UTF-8) or &#38;#FFFD; |
3237
|
|
|
* (otherwise) instead of leaving them as is. This may be useful, for |
3238
|
|
|
* instance, to ensure the well-formedness of XML documents with |
3239
|
|
|
* embedded external content. |
3240
|
|
|
* </td> |
3241
|
|
|
* </tr> |
3242
|
|
|
* <tr valign="top"> |
3243
|
|
|
* <td><b>ENT_HTML401</b></td> |
3244
|
|
|
* <td> |
3245
|
|
|
* Handle code as HTML 4.01. |
3246
|
|
|
* </td> |
3247
|
|
|
* </tr> |
3248
|
|
|
* <tr valign="top"> |
3249
|
|
|
* <td><b>ENT_XML1</b></td> |
3250
|
|
|
* <td> |
3251
|
|
|
* Handle code as XML 1. |
3252
|
|
|
* </td> |
3253
|
|
|
* </tr> |
3254
|
|
|
* <tr valign="top"> |
3255
|
|
|
* <td><b>ENT_XHTML</b></td> |
3256
|
|
|
* <td> |
3257
|
|
|
* Handle code as XHTML. |
3258
|
|
|
* </td> |
3259
|
|
|
* </tr> |
3260
|
|
|
* <tr valign="top"> |
3261
|
|
|
* <td><b>ENT_HTML5</b></td> |
3262
|
|
|
* <td> |
3263
|
|
|
* Handle code as HTML 5. |
3264
|
|
|
* </td> |
3265
|
|
|
* </tr> |
3266
|
|
|
* </table> |
3267
|
|
|
* </p> |
3268
|
|
|
* @param string $encoding [optional] <p> |
3269
|
|
|
* Defines encoding used in conversion. |
3270
|
|
|
* </p> |
3271
|
|
|
* <p> |
3272
|
|
|
* For the purposes of this function, the encodings |
3273
|
|
|
* ISO-8859-1, ISO-8859-15, |
3274
|
|
|
* UTF-8, cp866, |
3275
|
|
|
* cp1251, cp1252, and |
3276
|
|
|
* KOI8-R are effectively equivalent, provided the |
3277
|
|
|
* <i>string</i> itself is valid for the encoding, as |
3278
|
|
|
* the characters affected by <b>htmlspecialchars</b> occupy |
3279
|
|
|
* the same positions in all of these encodings. |
3280
|
|
|
* </p> |
3281
|
|
|
* @param bool $double_encode [optional] <p> |
3282
|
|
|
* When <i>double_encode</i> is turned off PHP will not |
3283
|
|
|
* encode existing html entities, the default is to convert everything. |
3284
|
|
|
* </p> |
3285
|
|
|
* |
3286
|
|
|
* @psalm-pure |
3287
|
|
|
* |
3288
|
|
|
* @return string the converted string. |
3289
|
|
|
* </p> |
3290
|
|
|
* <p> |
3291
|
|
|
* If the input <i>string</i> contains an invalid code unit |
3292
|
|
|
* sequence within the given <i>encoding</i> an empty string |
3293
|
|
|
* will be returned, unless either the <b>ENT_IGNORE</b> or |
3294
|
|
|
* <b>ENT_SUBSTITUTE</b> flags are set |
3295
|
|
|
*/ |
3296
|
8 |
|
public static function htmlspecialchars( |
3297
|
|
|
string $str, |
3298
|
|
|
int $flags = \ENT_COMPAT, |
3299
|
|
|
string $encoding = 'UTF-8', |
3300
|
|
|
bool $double_encode = true |
3301
|
|
|
): string { |
3302
|
8 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
3303
|
8 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
3304
|
|
|
} |
3305
|
|
|
|
3306
|
8 |
|
return \htmlspecialchars( |
3307
|
8 |
|
$str, |
3308
|
8 |
|
$flags, |
3309
|
8 |
|
$encoding, |
3310
|
8 |
|
$double_encode |
3311
|
|
|
); |
3312
|
|
|
} |
3313
|
|
|
|
3314
|
|
|
/** |
3315
|
|
|
* Checks whether iconv is available on the server. |
3316
|
|
|
* |
3317
|
|
|
* @psalm-pure |
3318
|
|
|
* |
3319
|
|
|
* @return bool |
3320
|
|
|
* <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> |
3321
|
|
|
* |
3322
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
3323
|
|
|
*/ |
3324
|
|
|
public static function iconv_loaded(): bool |
3325
|
|
|
{ |
3326
|
|
|
return \extension_loaded('iconv'); |
3327
|
|
|
} |
3328
|
|
|
|
3329
|
|
|
/** |
3330
|
|
|
* Converts Integer to hexadecimal U+xxxx code point representation. |
3331
|
|
|
* |
3332
|
|
|
* INFO: opposite to UTF8::hex_to_int() |
3333
|
|
|
* |
3334
|
|
|
* EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code> |
3335
|
|
|
* |
3336
|
|
|
* @param int $int <p>The integer to be converted to hexadecimal code point.</p> |
3337
|
|
|
* @param string $prefix [optional] |
3338
|
|
|
* |
3339
|
|
|
* @psalm-pure |
3340
|
|
|
* |
3341
|
|
|
* @return string the code point, or empty string on failure |
3342
|
|
|
*/ |
3343
|
6 |
|
public static function int_to_hex(int $int, string $prefix = 'U+'): string |
3344
|
|
|
{ |
3345
|
6 |
|
$hex = \dechex($int); |
3346
|
|
|
|
3347
|
6 |
|
$hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex); |
3348
|
|
|
|
3349
|
6 |
|
return $prefix . $hex . ''; |
3350
|
|
|
} |
3351
|
|
|
|
3352
|
|
|
/** |
3353
|
|
|
* Checks whether intl-char is available on the server. |
3354
|
|
|
* |
3355
|
|
|
* @psalm-pure |
3356
|
|
|
* |
3357
|
|
|
* @return bool |
3358
|
|
|
* <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> |
3359
|
|
|
* |
3360
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
3361
|
|
|
*/ |
3362
|
|
|
public static function intlChar_loaded(): bool |
3363
|
|
|
{ |
3364
|
|
|
return \class_exists('IntlChar'); |
3365
|
|
|
} |
3366
|
|
|
|
3367
|
|
|
/** |
3368
|
|
|
* Checks whether intl is available on the server. |
3369
|
|
|
* |
3370
|
|
|
* @psalm-pure |
3371
|
|
|
* |
3372
|
|
|
* @return bool |
3373
|
|
|
* <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> |
3374
|
|
|
* |
3375
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
3376
|
|
|
*/ |
3377
|
5 |
|
public static function intl_loaded(): bool |
3378
|
|
|
{ |
3379
|
5 |
|
return \extension_loaded('intl'); |
3380
|
|
|
} |
3381
|
|
|
|
3382
|
|
|
/** |
3383
|
|
|
* Returns true if the string contains only alphabetic chars, false otherwise. |
3384
|
|
|
* |
3385
|
|
|
* @param string $str <p>The input string.</p> |
3386
|
|
|
* |
3387
|
|
|
* @psalm-pure |
3388
|
|
|
* |
3389
|
|
|
* @return bool |
3390
|
|
|
* <p>Whether or not $str contains only alphabetic chars.</p> |
3391
|
|
|
*/ |
3392
|
10 |
|
public static function is_alpha(string $str): bool |
3393
|
|
|
{ |
3394
|
10 |
|
if (self::$SUPPORT['mbstring'] === true) { |
3395
|
10 |
|
return \mb_ereg_match('^[[:alpha:]]*$', $str); |
3396
|
|
|
} |
3397
|
|
|
|
3398
|
|
|
return self::str_matches_pattern($str, '^[[:alpha:]]*$'); |
3399
|
|
|
} |
3400
|
|
|
|
3401
|
|
|
/** |
3402
|
|
|
* Returns true if the string contains only alphabetic and numeric chars, false otherwise. |
3403
|
|
|
* |
3404
|
|
|
* @param string $str <p>The input string.</p> |
3405
|
|
|
* |
3406
|
|
|
* @psalm-pure |
3407
|
|
|
* |
3408
|
|
|
* @return bool |
3409
|
|
|
* <p>Whether or not $str contains only alphanumeric chars.</p> |
3410
|
|
|
*/ |
3411
|
13 |
|
public static function is_alphanumeric(string $str): bool |
3412
|
|
|
{ |
3413
|
13 |
|
if (self::$SUPPORT['mbstring'] === true) { |
3414
|
13 |
|
return \mb_ereg_match('^[[:alnum:]]*$', $str); |
3415
|
|
|
} |
3416
|
|
|
|
3417
|
|
|
return self::str_matches_pattern($str, '^[[:alnum:]]*$'); |
3418
|
|
|
} |
3419
|
|
|
|
3420
|
|
|
/** |
3421
|
|
|
* Returns true if the string contains only punctuation chars, false otherwise. |
3422
|
|
|
* |
3423
|
|
|
* @param string $str <p>The input string.</p> |
3424
|
|
|
* |
3425
|
|
|
* @psalm-pure |
3426
|
|
|
* |
3427
|
|
|
* @return bool |
3428
|
|
|
* <p>Whether or not $str contains only punctuation chars.</p> |
3429
|
|
|
*/ |
3430
|
10 |
|
public static function is_punctuation(string $str): bool |
3431
|
|
|
{ |
3432
|
10 |
|
return self::str_matches_pattern($str, '^[[:punct:]]*$'); |
3433
|
|
|
} |
3434
|
|
|
|
3435
|
|
|
/** |
3436
|
|
|
* Returns true if the string contains only printable (non-invisible) chars, false otherwise. |
3437
|
|
|
* |
3438
|
|
|
* @param string $str <p>The input string.</p> |
3439
|
|
|
* @param bool $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p> |
3440
|
|
|
* |
3441
|
|
|
* @psalm-pure |
3442
|
|
|
* |
3443
|
|
|
* @return bool |
3444
|
|
|
* <p>Whether or not $str contains only printable (non-invisible) chars.</p> |
3445
|
|
|
*/ |
3446
|
1 |
|
public static function is_printable(string $str, bool $ignore_control_characters = false): bool |
3447
|
|
|
{ |
3448
|
1 |
|
return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str; |
3449
|
|
|
} |
3450
|
|
|
|
3451
|
|
|
/** |
3452
|
|
|
* Checks if a string is 7 bit ASCII. |
3453
|
|
|
* |
3454
|
|
|
* EXAMPLE: <code>UTF8::is_ascii('白'); // false</code> |
3455
|
|
|
* |
3456
|
|
|
* @param string $str <p>The string to check.</p> |
3457
|
|
|
* |
3458
|
|
|
* @psalm-pure |
3459
|
|
|
* |
3460
|
|
|
* @return bool |
3461
|
|
|
* <p> |
3462
|
|
|
* <strong>true</strong> if it is ASCII<br> |
3463
|
|
|
* <strong>false</strong> otherwise |
3464
|
|
|
* </p> |
3465
|
|
|
*/ |
3466
|
8 |
|
public static function is_ascii(string $str): bool |
3467
|
|
|
{ |
3468
|
8 |
|
return ASCII::is_ascii($str); |
3469
|
|
|
} |
3470
|
|
|
|
3471
|
|
|
/** |
3472
|
|
|
* Returns true if the string is base64 encoded, false otherwise. |
3473
|
|
|
* |
3474
|
|
|
* EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code> |
3475
|
|
|
* |
3476
|
|
|
* @param string|null $str <p>The input string.</p> |
3477
|
|
|
* @param bool $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p> |
3478
|
|
|
* |
3479
|
|
|
* @psalm-pure |
3480
|
|
|
* |
3481
|
|
|
* @return bool |
3482
|
|
|
* <p>Whether or not $str is base64 encoded.</p> |
3483
|
|
|
*/ |
3484
|
16 |
|
public static function is_base64($str, bool $empty_string_is_valid = false): bool |
3485
|
|
|
{ |
3486
|
|
|
if ( |
3487
|
16 |
|
!$empty_string_is_valid |
3488
|
|
|
&& |
3489
|
16 |
|
$str === '' |
3490
|
|
|
) { |
3491
|
3 |
|
return false; |
3492
|
|
|
} |
3493
|
|
|
|
3494
|
15 |
|
if (!\is_string($str)) { |
3495
|
2 |
|
return false; |
3496
|
|
|
} |
3497
|
|
|
|
3498
|
15 |
|
$base64String = \base64_decode($str, true); |
3499
|
|
|
|
3500
|
15 |
|
return $base64String !== false && \base64_encode($base64String) === $str; |
3501
|
|
|
} |
3502
|
|
|
|
3503
|
|
|
/** |
3504
|
|
|
* Check if the input is binary... (is look like a hack). |
3505
|
|
|
* |
3506
|
|
|
* EXAMPLE: <code>UTF8::is_binary(01); // true</code> |
3507
|
|
|
* |
3508
|
|
|
* @param int|string $input |
3509
|
|
|
* @param bool $strict |
3510
|
|
|
* |
3511
|
|
|
* @psalm-pure |
3512
|
|
|
* |
3513
|
|
|
* @return bool |
3514
|
|
|
*/ |
3515
|
39 |
|
public static function is_binary($input, bool $strict = false): bool |
3516
|
|
|
{ |
3517
|
39 |
|
$input = (string) $input; |
3518
|
39 |
|
if ($input === '') { |
3519
|
10 |
|
return false; |
3520
|
|
|
} |
3521
|
|
|
|
3522
|
39 |
|
if (\preg_match('~^[01]+$~', $input)) { |
3523
|
13 |
|
return true; |
3524
|
|
|
} |
3525
|
|
|
|
3526
|
39 |
|
$ext = self::get_file_type($input); |
3527
|
39 |
|
if ($ext['type'] === 'binary') { |
3528
|
7 |
|
return true; |
3529
|
|
|
} |
3530
|
|
|
|
3531
|
38 |
|
if (!$strict) { |
3532
|
7 |
|
$test_length = \strlen($input); |
3533
|
7 |
|
$test_null_counting = \substr_count($input, "\x0", 0, $test_length); |
3534
|
7 |
|
if (($test_null_counting / $test_length) > 0.25) { |
3535
|
5 |
|
return true; |
3536
|
|
|
} |
3537
|
|
|
} |
3538
|
|
|
|
3539
|
38 |
|
if ($strict) { |
3540
|
38 |
|
if (self::$SUPPORT['finfo'] === false) { |
3541
|
|
|
throw new \RuntimeException('ext-fileinfo: is not installed'); |
3542
|
|
|
} |
3543
|
|
|
|
3544
|
|
|
/** |
3545
|
|
|
* @psalm-suppress ImpureMethodCall - it will return the same result for the same file ... |
3546
|
|
|
*/ |
3547
|
38 |
|
$finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input); |
3548
|
38 |
|
if ($finfo_encoding && $finfo_encoding === 'binary') { |
3549
|
20 |
|
return true; |
3550
|
|
|
} |
3551
|
|
|
} |
3552
|
|
|
|
3553
|
33 |
|
return false; |
3554
|
|
|
} |
3555
|
|
|
|
3556
|
|
|
/** |
3557
|
|
|
* Check if the file is binary. |
3558
|
|
|
* |
3559
|
|
|
* EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code> |
3560
|
|
|
* |
3561
|
|
|
* @param string $file |
3562
|
|
|
* |
3563
|
|
|
* @return bool |
3564
|
|
|
*/ |
3565
|
6 |
|
public static function is_binary_file($file): bool |
3566
|
|
|
{ |
3567
|
|
|
// init |
3568
|
6 |
|
$block = ''; |
3569
|
|
|
|
3570
|
6 |
|
$fp = \fopen($file, 'rb'); |
3571
|
6 |
|
if (\is_resource($fp)) { |
3572
|
6 |
|
$block = \fread($fp, 512); |
3573
|
6 |
|
\fclose($fp); |
3574
|
|
|
} |
3575
|
|
|
|
3576
|
6 |
|
if ($block === '' || $block === false) { |
3577
|
2 |
|
return false; |
3578
|
|
|
} |
3579
|
|
|
|
3580
|
6 |
|
return self::is_binary($block, true); |
3581
|
|
|
} |
3582
|
|
|
|
3583
|
|
|
/** |
3584
|
|
|
* Returns true if the string contains only whitespace chars, false otherwise. |
3585
|
|
|
* |
3586
|
|
|
* @param string $str <p>The input string.</p> |
3587
|
|
|
* |
3588
|
|
|
* @psalm-pure |
3589
|
|
|
* |
3590
|
|
|
* @return bool |
3591
|
|
|
* <p>Whether or not $str contains only whitespace characters.</p> |
3592
|
|
|
*/ |
3593
|
15 |
|
public static function is_blank(string $str): bool |
3594
|
|
|
{ |
3595
|
15 |
|
if (self::$SUPPORT['mbstring'] === true) { |
3596
|
15 |
|
return \mb_ereg_match('^[[:space:]]*$', $str); |
3597
|
|
|
} |
3598
|
|
|
|
3599
|
|
|
return self::str_matches_pattern($str, '^[[:space:]]*$'); |
3600
|
|
|
} |
3601
|
|
|
|
3602
|
|
|
/** |
3603
|
|
|
* Checks if the given string is equal to any "Byte Order Mark". |
3604
|
|
|
* |
3605
|
|
|
* WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string. |
3606
|
|
|
* |
3607
|
|
|
* EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code> |
3608
|
|
|
* |
3609
|
|
|
* @param string $str <p>The input string.</p> |
3610
|
|
|
* |
3611
|
|
|
* @psalm-pure |
3612
|
|
|
* |
3613
|
|
|
* @return bool |
3614
|
|
|
* <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p> |
3615
|
|
|
*/ |
3616
|
2 |
|
public static function is_bom($str): bool |
3617
|
|
|
{ |
3618
|
|
|
/** @noinspection PhpUnusedLocalVariableInspection */ |
3619
|
2 |
|
foreach (self::$BOM as $bom_string => &$bom_byte_length) { |
3620
|
2 |
|
if ($str === $bom_string) { |
3621
|
2 |
|
return true; |
3622
|
|
|
} |
3623
|
|
|
} |
3624
|
|
|
|
3625
|
2 |
|
return false; |
3626
|
|
|
} |
3627
|
|
|
|
3628
|
|
|
/** |
3629
|
|
|
* Determine whether the string is considered to be empty. |
3630
|
|
|
* |
3631
|
|
|
* A variable is considered empty if it does not exist or if its value equals FALSE. |
3632
|
|
|
* empty() does not generate a warning if the variable does not exist. |
3633
|
|
|
* |
3634
|
|
|
* @param array|float|int|string $str |
3635
|
|
|
* |
3636
|
|
|
* @psalm-pure |
3637
|
|
|
* |
3638
|
|
|
* @return bool |
3639
|
|
|
* <p>Whether or not $str is empty().</p> |
3640
|
|
|
*/ |
3641
|
1 |
|
public static function is_empty($str): bool |
3642
|
|
|
{ |
3643
|
1 |
|
return empty($str); |
3644
|
|
|
} |
3645
|
|
|
|
3646
|
|
|
/** |
3647
|
|
|
* Returns true if the string contains only hexadecimal chars, false otherwise. |
3648
|
|
|
* |
3649
|
|
|
* @param string $str <p>The input string.</p> |
3650
|
|
|
* |
3651
|
|
|
* @psalm-pure |
3652
|
|
|
* |
3653
|
|
|
* @return bool |
3654
|
|
|
* <p>Whether or not $str contains only hexadecimal chars.</p> |
3655
|
|
|
*/ |
3656
|
13 |
|
public static function is_hexadecimal(string $str): bool |
3657
|
|
|
{ |
3658
|
13 |
|
if (self::$SUPPORT['mbstring'] === true) { |
3659
|
13 |
|
return \mb_ereg_match('^[[:xdigit:]]*$', $str); |
3660
|
|
|
} |
3661
|
|
|
|
3662
|
|
|
return self::str_matches_pattern($str, '^[[:xdigit:]]*$'); |
3663
|
|
|
} |
3664
|
|
|
|
3665
|
|
|
/** |
3666
|
|
|
* Check if the string contains any HTML tags. |
3667
|
|
|
* |
3668
|
|
|
* EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code> |
3669
|
|
|
* |
3670
|
|
|
* @param string $str <p>The input string.</p> |
3671
|
|
|
* |
3672
|
|
|
* @psalm-pure |
3673
|
|
|
* |
3674
|
|
|
* @return bool |
3675
|
|
|
* <p>Whether or not $str contains html elements.</p> |
3676
|
|
|
*/ |
3677
|
3 |
|
public static function is_html(string $str): bool |
3678
|
|
|
{ |
3679
|
3 |
|
if ($str === '') { |
3680
|
3 |
|
return false; |
3681
|
|
|
} |
3682
|
|
|
|
3683
|
|
|
// init |
3684
|
3 |
|
$matches = []; |
3685
|
|
|
|
3686
|
3 |
|
$str = self::emoji_encode($str); // hack for emoji support :/ |
3687
|
|
|
|
3688
|
3 |
|
\preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches); |
3689
|
|
|
|
3690
|
3 |
|
return $matches !== []; |
3691
|
|
|
} |
3692
|
|
|
|
3693
|
|
|
/** |
3694
|
|
|
* Check if $url is an correct url. |
3695
|
|
|
* |
3696
|
|
|
* @param string $url |
3697
|
|
|
* @param bool $disallow_localhost |
3698
|
|
|
* |
3699
|
|
|
* @psalm-pure |
3700
|
|
|
* |
3701
|
|
|
* @return bool |
3702
|
|
|
*/ |
3703
|
1 |
|
public static function is_url(string $url, bool $disallow_localhost = false): bool |
3704
|
|
|
{ |
3705
|
1 |
|
if ($url === '') { |
3706
|
1 |
|
return false; |
3707
|
|
|
} |
3708
|
|
|
|
3709
|
|
|
// WARNING: keep this as hack protection |
3710
|
1 |
|
if (!self::str_istarts_with_any($url, ['http://', 'https://'])) { |
3711
|
1 |
|
return false; |
3712
|
|
|
} |
3713
|
|
|
|
3714
|
|
|
// e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/... |
3715
|
1 |
|
if ($disallow_localhost) { |
3716
|
1 |
|
if (self::str_istarts_with_any( |
3717
|
1 |
|
$url, |
3718
|
|
|
[ |
3719
|
1 |
|
'http://localhost', |
3720
|
|
|
'https://localhost', |
3721
|
|
|
'http://127.0.0.1', |
3722
|
|
|
'https://127.0.0.1', |
3723
|
|
|
'http://::1', |
3724
|
|
|
'https://::1', |
3725
|
|
|
] |
3726
|
|
|
)) { |
3727
|
1 |
|
return false; |
3728
|
|
|
} |
3729
|
|
|
|
3730
|
1 |
|
$regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu'; |
3731
|
1 |
|
if (\preg_match($regex, $url)) { |
3732
|
1 |
|
return false; |
3733
|
|
|
} |
3734
|
|
|
} |
3735
|
|
|
|
3736
|
|
|
// INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters |
3737
|
1 |
|
$regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu'; |
3738
|
1 |
|
if (\preg_match($regex, $url)) { |
3739
|
1 |
|
return true; |
3740
|
|
|
} |
3741
|
|
|
|
3742
|
1 |
|
return \filter_var($url, \FILTER_VALIDATE_URL) !== false; |
3743
|
|
|
} |
3744
|
|
|
|
3745
|
|
|
/** |
3746
|
|
|
* Try to check if "$str" is a JSON-string. |
3747
|
|
|
* |
3748
|
|
|
* EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code> |
3749
|
|
|
* |
3750
|
|
|
* @param string $str <p>The input string.</p> |
3751
|
|
|
* @param bool $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json |
3752
|
|
|
* results.</p> |
3753
|
|
|
* |
3754
|
|
|
* @return bool |
3755
|
|
|
* <p>Whether or not the $str is in JSON format.</p> |
3756
|
|
|
*/ |
3757
|
42 |
|
public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool |
3758
|
|
|
{ |
3759
|
42 |
|
if ($str === '') { |
3760
|
4 |
|
return false; |
3761
|
|
|
} |
3762
|
|
|
|
3763
|
40 |
|
if (self::$SUPPORT['json'] === false) { |
3764
|
|
|
throw new \RuntimeException('ext-json: is not installed'); |
3765
|
|
|
} |
3766
|
|
|
|
3767
|
40 |
|
$jsonOrNull = self::json_decode($str); |
3768
|
40 |
|
if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') { |
3769
|
18 |
|
return false; |
3770
|
|
|
} |
3771
|
|
|
|
3772
|
|
|
if ( |
3773
|
24 |
|
$only_array_or_object_results_are_valid |
3774
|
|
|
&& |
3775
|
24 |
|
!\is_object($jsonOrNull) |
3776
|
|
|
&& |
3777
|
24 |
|
!\is_array($jsonOrNull) |
3778
|
|
|
) { |
3779
|
5 |
|
return false; |
3780
|
|
|
} |
3781
|
|
|
|
3782
|
19 |
|
return \json_last_error() === \JSON_ERROR_NONE; |
3783
|
|
|
} |
3784
|
|
|
|
3785
|
|
|
/** |
3786
|
|
|
* @param string $str <p>The input string.</p> |
3787
|
|
|
* |
3788
|
|
|
* @psalm-pure |
3789
|
|
|
* |
3790
|
|
|
* @return bool |
3791
|
|
|
* <p>Whether or not $str contains only lowercase chars.</p> |
3792
|
|
|
*/ |
3793
|
8 |
|
public static function is_lowercase(string $str): bool |
3794
|
|
|
{ |
3795
|
8 |
|
if (self::$SUPPORT['mbstring'] === true) { |
3796
|
8 |
|
return \mb_ereg_match('^[[:lower:]]*$', $str); |
3797
|
|
|
} |
3798
|
|
|
|
3799
|
|
|
return self::str_matches_pattern($str, '^[[:lower:]]*$'); |
3800
|
|
|
} |
3801
|
|
|
|
3802
|
|
|
/** |
3803
|
|
|
* Returns true if the string is serialized, false otherwise. |
3804
|
|
|
* |
3805
|
|
|
* @param string $str <p>The input string.</p> |
3806
|
|
|
* |
3807
|
|
|
* @psalm-pure |
3808
|
|
|
* |
3809
|
|
|
* @return bool |
3810
|
|
|
* <p>Whether or not $str is serialized.</p> |
3811
|
|
|
*/ |
3812
|
7 |
|
public static function is_serialized(string $str): bool |
3813
|
|
|
{ |
3814
|
7 |
|
if ($str === '') { |
3815
|
1 |
|
return false; |
3816
|
|
|
} |
3817
|
|
|
|
3818
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
3819
|
|
|
/** @noinspection UnserializeExploitsInspection */ |
3820
|
6 |
|
return $str === 'b:0;' |
3821
|
|
|
|| |
3822
|
6 |
|
@\unserialize($str, []) !== false; |
3823
|
|
|
} |
3824
|
|
|
|
3825
|
|
|
/** |
3826
|
|
|
* Returns true if the string contains only lower case chars, false |
3827
|
|
|
* otherwise. |
3828
|
|
|
* |
3829
|
|
|
* @param string $str <p>The input string.</p> |
3830
|
|
|
* |
3831
|
|
|
* @psalm-pure |
3832
|
|
|
* |
3833
|
|
|
* @return bool |
3834
|
|
|
* <p>Whether or not $str contains only lower case characters.</p> |
3835
|
|
|
*/ |
3836
|
8 |
|
public static function is_uppercase(string $str): bool |
3837
|
|
|
{ |
3838
|
8 |
|
if (self::$SUPPORT['mbstring'] === true) { |
3839
|
8 |
|
return \mb_ereg_match('^[[:upper:]]*$', $str); |
3840
|
|
|
} |
3841
|
|
|
|
3842
|
|
|
return self::str_matches_pattern($str, '^[[:upper:]]*$'); |
3843
|
|
|
} |
3844
|
|
|
|
3845
|
|
|
/** |
3846
|
|
|
* Check if the string is UTF-16. |
3847
|
|
|
* |
3848
|
|
|
* EXAMPLE: <code> |
3849
|
|
|
* UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1 |
3850
|
|
|
* // |
3851
|
|
|
* UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2 |
3852
|
|
|
* // |
3853
|
|
|
* UTF8::is_utf16(file_get_contents('utf-8.txt')); // false |
3854
|
|
|
* </code> |
3855
|
|
|
* |
3856
|
|
|
* @param string $str <p>The input string.</p> |
3857
|
|
|
* @param bool $check_if_string_is_binary |
3858
|
|
|
* |
3859
|
|
|
* @psalm-pure |
3860
|
|
|
* |
3861
|
|
|
* @return false|int |
3862
|
|
|
* <strong>false</strong> if is't not UTF-16,<br> |
3863
|
|
|
* <strong>1</strong> for UTF-16LE,<br> |
3864
|
|
|
* <strong>2</strong> for UTF-16BE |
3865
|
|
|
*/ |
3866
|
21 |
|
public static function is_utf16($str, bool $check_if_string_is_binary = true) |
3867
|
|
|
{ |
3868
|
|
|
// init |
3869
|
21 |
|
$str = (string) $str; |
3870
|
21 |
|
$str_chars = []; |
3871
|
|
|
|
3872
|
|
|
// fix for the "binary"-check |
3873
|
21 |
|
if ($check_if_string_is_binary !== false && self::string_has_bom($str)) { |
3874
|
2 |
|
$check_if_string_is_binary = false; |
3875
|
|
|
} |
3876
|
|
|
|
3877
|
|
|
if ( |
3878
|
21 |
|
$check_if_string_is_binary |
3879
|
|
|
&& |
3880
|
21 |
|
!self::is_binary($str, true) |
3881
|
|
|
) { |
3882
|
2 |
|
return false; |
3883
|
|
|
} |
3884
|
|
|
|
3885
|
21 |
|
if (self::$SUPPORT['mbstring'] === false) { |
3886
|
|
|
/** |
3887
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
3888
|
|
|
*/ |
3889
|
3 |
|
\trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING); |
3890
|
|
|
} |
3891
|
|
|
|
3892
|
21 |
|
$str = self::remove_bom($str); |
3893
|
|
|
|
3894
|
21 |
|
$maybe_utf16le = 0; |
3895
|
21 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE'); |
3896
|
21 |
|
if ($test) { |
3897
|
21 |
|
$test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8'); |
3898
|
21 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE'); |
3899
|
21 |
|
if ($test3 === $test) { |
3900
|
|
|
/** |
3901
|
|
|
* @psalm-suppress RedundantCondition |
3902
|
|
|
*/ |
3903
|
21 |
|
if ($str_chars === []) { |
3904
|
21 |
|
$str_chars = self::count_chars($str, true, false); |
3905
|
|
|
} |
3906
|
21 |
|
foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { |
|
|
|
|
3907
|
21 |
|
if (\in_array($test3char, $str_chars, true)) { |
3908
|
5 |
|
++$maybe_utf16le; |
3909
|
|
|
} |
3910
|
|
|
} |
3911
|
21 |
|
unset($test3charEmpty); |
3912
|
|
|
} |
3913
|
|
|
} |
3914
|
|
|
|
3915
|
21 |
|
$maybe_utf16be = 0; |
3916
|
21 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE'); |
3917
|
21 |
|
if ($test) { |
3918
|
21 |
|
$test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8'); |
3919
|
21 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE'); |
3920
|
21 |
|
if ($test3 === $test) { |
3921
|
21 |
|
if ($str_chars === []) { |
3922
|
11 |
|
$str_chars = self::count_chars($str, true, false); |
3923
|
|
|
} |
3924
|
21 |
|
foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { |
|
|
|
|
3925
|
21 |
|
if (\in_array($test3char, $str_chars, true)) { |
3926
|
6 |
|
++$maybe_utf16be; |
3927
|
|
|
} |
3928
|
|
|
} |
3929
|
21 |
|
unset($test3charEmpty); |
3930
|
|
|
} |
3931
|
|
|
} |
3932
|
|
|
|
3933
|
21 |
|
if ($maybe_utf16be !== $maybe_utf16le) { |
3934
|
7 |
|
if ($maybe_utf16le > $maybe_utf16be) { |
3935
|
5 |
|
return 1; |
3936
|
|
|
} |
3937
|
|
|
|
3938
|
6 |
|
return 2; |
3939
|
|
|
} |
3940
|
|
|
|
3941
|
17 |
|
return false; |
3942
|
|
|
} |
3943
|
|
|
|
3944
|
|
|
/** |
3945
|
|
|
* Check if the string is UTF-32. |
3946
|
|
|
* |
3947
|
|
|
* EXAMPLE: <code> |
3948
|
|
|
* UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1 |
3949
|
|
|
* // |
3950
|
|
|
* UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2 |
3951
|
|
|
* // |
3952
|
|
|
* UTF8::is_utf32(file_get_contents('utf-8.txt')); // false |
3953
|
|
|
* </code> |
3954
|
|
|
* |
3955
|
|
|
* @param string $str <p>The input string.</p> |
3956
|
|
|
* @param bool $check_if_string_is_binary |
3957
|
|
|
* |
3958
|
|
|
* @psalm-pure |
3959
|
|
|
* |
3960
|
|
|
* @return false|int |
3961
|
|
|
* <strong>false</strong> if is't not UTF-32,<br> |
3962
|
|
|
* <strong>1</strong> for UTF-32LE,<br> |
3963
|
|
|
* <strong>2</strong> for UTF-32BE |
3964
|
|
|
*/ |
3965
|
19 |
|
public static function is_utf32($str, bool $check_if_string_is_binary = true) |
3966
|
|
|
{ |
3967
|
|
|
// init |
3968
|
19 |
|
$str = (string) $str; |
3969
|
19 |
|
$str_chars = []; |
3970
|
|
|
|
3971
|
|
|
// fix for the "binary"-check |
3972
|
19 |
|
if ($check_if_string_is_binary !== false && self::string_has_bom($str)) { |
3973
|
2 |
|
$check_if_string_is_binary = false; |
3974
|
|
|
} |
3975
|
|
|
|
3976
|
|
|
if ( |
3977
|
19 |
|
$check_if_string_is_binary |
3978
|
|
|
&& |
3979
|
19 |
|
!self::is_binary($str, true) |
3980
|
|
|
) { |
3981
|
2 |
|
return false; |
3982
|
|
|
} |
3983
|
|
|
|
3984
|
19 |
|
if (self::$SUPPORT['mbstring'] === false) { |
3985
|
|
|
/** |
3986
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
3987
|
|
|
*/ |
3988
|
3 |
|
\trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING); |
3989
|
|
|
} |
3990
|
|
|
|
3991
|
19 |
|
$str = self::remove_bom($str); |
3992
|
|
|
|
3993
|
19 |
|
$maybe_utf32le = 0; |
3994
|
19 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE'); |
3995
|
19 |
|
if ($test) { |
3996
|
19 |
|
$test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8'); |
3997
|
19 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE'); |
3998
|
19 |
|
if ($test3 === $test) { |
3999
|
|
|
/** |
4000
|
|
|
* @psalm-suppress RedundantCondition |
4001
|
|
|
*/ |
4002
|
19 |
|
if ($str_chars === []) { |
4003
|
19 |
|
$str_chars = self::count_chars($str, true, false); |
4004
|
|
|
} |
4005
|
19 |
|
foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { |
|
|
|
|
4006
|
19 |
|
if (\in_array($test3char, $str_chars, true)) { |
4007
|
2 |
|
++$maybe_utf32le; |
4008
|
|
|
} |
4009
|
|
|
} |
4010
|
19 |
|
unset($test3charEmpty); |
4011
|
|
|
} |
4012
|
|
|
} |
4013
|
|
|
|
4014
|
19 |
|
$maybe_utf32be = 0; |
4015
|
19 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE'); |
4016
|
19 |
|
if ($test) { |
4017
|
19 |
|
$test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8'); |
4018
|
19 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE'); |
4019
|
19 |
|
if ($test3 === $test) { |
4020
|
19 |
|
if ($str_chars === []) { |
4021
|
11 |
|
$str_chars = self::count_chars($str, true, false); |
4022
|
|
|
} |
4023
|
19 |
|
foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { |
|
|
|
|
4024
|
19 |
|
if (\in_array($test3char, $str_chars, true)) { |
4025
|
3 |
|
++$maybe_utf32be; |
4026
|
|
|
} |
4027
|
|
|
} |
4028
|
19 |
|
unset($test3charEmpty); |
4029
|
|
|
} |
4030
|
|
|
} |
4031
|
|
|
|
4032
|
19 |
|
if ($maybe_utf32be !== $maybe_utf32le) { |
4033
|
3 |
|
if ($maybe_utf32le > $maybe_utf32be) { |
4034
|
2 |
|
return 1; |
4035
|
|
|
} |
4036
|
|
|
|
4037
|
3 |
|
return 2; |
4038
|
|
|
} |
4039
|
|
|
|
4040
|
19 |
|
return false; |
4041
|
|
|
} |
4042
|
|
|
|
4043
|
|
|
/** |
4044
|
|
|
* Checks whether the passed input contains only byte sequences that appear valid UTF-8. |
4045
|
|
|
* |
4046
|
|
|
* EXAMPLE: <code> |
4047
|
|
|
* UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true |
4048
|
|
|
* // |
4049
|
|
|
* UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false |
4050
|
|
|
* </code> |
4051
|
|
|
* |
4052
|
|
|
* @param int|string|string[]|null $str <p>The input to be checked.</p> |
4053
|
|
|
* @param bool $strict <p>Check also if the string is not UTF-16 or UTF-32.</p> |
4054
|
|
|
* |
4055
|
|
|
* @psalm-pure |
4056
|
|
|
* |
4057
|
|
|
* @return bool |
4058
|
|
|
*/ |
4059
|
83 |
|
public static function is_utf8($str, bool $strict = false): bool |
4060
|
|
|
{ |
4061
|
83 |
|
if (\is_array($str)) { |
4062
|
2 |
|
foreach ($str as &$v) { |
4063
|
2 |
|
if (!self::is_utf8($v, $strict)) { |
4064
|
2 |
|
return false; |
4065
|
|
|
} |
4066
|
|
|
} |
4067
|
|
|
|
4068
|
|
|
return true; |
4069
|
|
|
} |
4070
|
|
|
|
4071
|
83 |
|
return self::is_utf8_string((string) $str, $strict); |
4072
|
|
|
} |
4073
|
|
|
|
4074
|
|
|
/** |
4075
|
|
|
* (PHP 5 >= 5.2.0, PECL json >= 1.2.0)<br/> |
4076
|
|
|
* Decodes a JSON string |
4077
|
|
|
* |
4078
|
|
|
* EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code> |
4079
|
|
|
* |
4080
|
|
|
* @see http://php.net/manual/en/function.json-decode.php |
4081
|
|
|
* |
4082
|
|
|
* @param string $json <p> |
4083
|
|
|
* The <i>json</i> string being decoded. |
4084
|
|
|
* </p> |
4085
|
|
|
* <p> |
4086
|
|
|
* This function only works with UTF-8 encoded strings. |
4087
|
|
|
* </p> |
4088
|
|
|
* <p>PHP implements a superset of |
4089
|
|
|
* JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard |
4090
|
|
|
* only supports these values when they are nested inside an array or an object. |
4091
|
|
|
* </p> |
4092
|
|
|
* @param bool $assoc [optional] <p> |
4093
|
|
|
* When <b>TRUE</b>, returned objects will be converted into |
4094
|
|
|
* associative arrays. |
4095
|
|
|
* </p> |
4096
|
|
|
* @param int $depth [optional] <p> |
4097
|
|
|
* User specified recursion depth. |
4098
|
|
|
* </p> |
4099
|
|
|
* @param int $options [optional] <p> |
4100
|
|
|
* Bitmask of JSON decode options. Currently only |
4101
|
|
|
* <b>JSON_BIGINT_AS_STRING</b> |
4102
|
|
|
* is supported (default is to cast large integers as floats) |
4103
|
|
|
* </p> |
4104
|
|
|
* |
4105
|
|
|
* @psalm-pure |
4106
|
|
|
* |
4107
|
|
|
* @return mixed |
4108
|
|
|
* <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and |
4109
|
|
|
* null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively. |
4110
|
|
|
* <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data |
4111
|
|
|
* is deeper than the recursion limit.</p> |
4112
|
|
|
*/ |
4113
|
43 |
|
public static function json_decode( |
4114
|
|
|
string $json, |
4115
|
|
|
bool $assoc = false, |
4116
|
|
|
int $depth = 512, |
4117
|
|
|
int $options = 0 |
4118
|
|
|
) { |
4119
|
43 |
|
$json = self::filter($json); |
4120
|
|
|
|
4121
|
43 |
|
if (self::$SUPPORT['json'] === false) { |
4122
|
|
|
throw new \RuntimeException('ext-json: is not installed'); |
4123
|
|
|
} |
4124
|
|
|
|
4125
|
43 |
|
if ($depth < 1) { |
4126
|
|
|
$depth = 1; |
4127
|
|
|
} |
4128
|
|
|
|
4129
|
43 |
|
return \json_decode($json, $assoc, $depth, $options); |
4130
|
|
|
} |
4131
|
|
|
|
4132
|
|
|
/** |
4133
|
|
|
* (PHP 5 >= 5.2.0, PECL json >= 1.2.0)<br/> |
4134
|
|
|
* Returns the JSON representation of a value. |
4135
|
|
|
* |
4136
|
|
|
* EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code> |
4137
|
|
|
* |
4138
|
|
|
* @see http://php.net/manual/en/function.json-encode.php |
4139
|
|
|
* |
4140
|
|
|
* @param mixed $value <p> |
4141
|
|
|
* The <i>value</i> being encoded. Can be any type except |
4142
|
|
|
* a resource. |
4143
|
|
|
* </p> |
4144
|
|
|
* <p> |
4145
|
|
|
* All string data must be UTF-8 encoded. |
4146
|
|
|
* </p> |
4147
|
|
|
* <p>PHP implements a superset of |
4148
|
|
|
* JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard |
4149
|
|
|
* only supports these values when they are nested inside an array or an object. |
4150
|
|
|
* </p> |
4151
|
|
|
* @param int $options [optional] <p> |
4152
|
|
|
* Bitmask consisting of <b>JSON_HEX_QUOT</b>, |
4153
|
|
|
* <b>JSON_HEX_TAG</b>, |
4154
|
|
|
* <b>JSON_HEX_AMP</b>, |
4155
|
|
|
* <b>JSON_HEX_APOS</b>, |
4156
|
|
|
* <b>JSON_NUMERIC_CHECK</b>, |
4157
|
|
|
* <b>JSON_PRETTY_PRINT</b>, |
4158
|
|
|
* <b>JSON_UNESCAPED_SLASHES</b>, |
4159
|
|
|
* <b>JSON_FORCE_OBJECT</b>, |
4160
|
|
|
* <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these |
4161
|
|
|
* constants is described on |
4162
|
|
|
* the JSON constants page. |
4163
|
|
|
* </p> |
4164
|
|
|
* @param int $depth [optional] <p> |
4165
|
|
|
* Set the maximum depth. Must be greater than zero. |
4166
|
|
|
* </p> |
4167
|
|
|
* |
4168
|
|
|
* @psalm-pure |
4169
|
|
|
* |
4170
|
|
|
* @return false|string |
4171
|
|
|
* A JSON encoded <strong>string</strong> on success or<br> |
4172
|
|
|
* <strong>FALSE</strong> on failure |
4173
|
|
|
*/ |
4174
|
5 |
|
public static function json_encode($value, int $options = 0, int $depth = 512) |
4175
|
|
|
{ |
4176
|
5 |
|
$value = self::filter($value); |
4177
|
|
|
|
4178
|
5 |
|
if (self::$SUPPORT['json'] === false) { |
4179
|
|
|
throw new \RuntimeException('ext-json: is not installed'); |
4180
|
|
|
} |
4181
|
|
|
|
4182
|
5 |
|
if ($depth < 1) { |
4183
|
|
|
$depth = 1; |
4184
|
|
|
} |
4185
|
|
|
|
4186
|
5 |
|
return \json_encode($value, $options, $depth); |
4187
|
|
|
} |
4188
|
|
|
|
4189
|
|
|
/** |
4190
|
|
|
* Checks whether JSON is available on the server. |
4191
|
|
|
* |
4192
|
|
|
* @psalm-pure |
4193
|
|
|
* |
4194
|
|
|
* @return bool |
4195
|
|
|
* <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> |
4196
|
|
|
* |
4197
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
4198
|
|
|
*/ |
4199
|
|
|
public static function json_loaded(): bool |
4200
|
|
|
{ |
4201
|
|
|
return \function_exists('json_decode'); |
4202
|
|
|
} |
4203
|
|
|
|
4204
|
|
|
/** |
4205
|
|
|
* Makes string's first char lowercase. |
4206
|
|
|
* |
4207
|
|
|
* EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code> |
4208
|
|
|
* |
4209
|
|
|
* @param string $str <p>The input string</p> |
4210
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
4211
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
4212
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
4213
|
|
|
* tr</p> |
4214
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
4215
|
|
|
* -> ß</p> |
4216
|
|
|
* |
4217
|
|
|
* @psalm-pure |
4218
|
|
|
* |
4219
|
|
|
* @return string the resulting string |
4220
|
|
|
*/ |
4221
|
46 |
|
public static function lcfirst( |
4222
|
|
|
string $str, |
4223
|
|
|
string $encoding = 'UTF-8', |
4224
|
|
|
bool $clean_utf8 = false, |
4225
|
|
|
string $lang = null, |
4226
|
|
|
bool $try_to_keep_the_string_length = false |
4227
|
|
|
): string { |
4228
|
46 |
|
if ($clean_utf8) { |
4229
|
|
|
$str = self::clean($str); |
4230
|
|
|
} |
4231
|
|
|
|
4232
|
46 |
|
$use_mb_functions = ($lang === null && !$try_to_keep_the_string_length); |
4233
|
|
|
|
4234
|
46 |
|
if ($encoding === 'UTF-8') { |
4235
|
43 |
|
$str_part_two = (string) \mb_substr($str, 1); |
4236
|
|
|
|
4237
|
43 |
|
if ($use_mb_functions) { |
4238
|
43 |
|
$str_part_one = \mb_strtolower( |
4239
|
43 |
|
(string) \mb_substr($str, 0, 1) |
4240
|
|
|
); |
4241
|
|
|
} else { |
4242
|
43 |
|
$str_part_one = self::strtolower( |
4243
|
|
|
(string) \mb_substr($str, 0, 1), |
4244
|
|
|
$encoding, |
4245
|
|
|
false, |
4246
|
|
|
$lang, |
4247
|
|
|
$try_to_keep_the_string_length |
4248
|
|
|
); |
4249
|
|
|
} |
4250
|
|
|
} else { |
4251
|
3 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
4252
|
|
|
|
4253
|
3 |
|
$str_part_two = (string) self::substr($str, 1, null, $encoding); |
4254
|
|
|
|
4255
|
3 |
|
$str_part_one = self::strtolower( |
4256
|
3 |
|
(string) self::substr($str, 0, 1, $encoding), |
4257
|
|
|
$encoding, |
4258
|
3 |
|
false, |
4259
|
|
|
$lang, |
4260
|
|
|
$try_to_keep_the_string_length |
4261
|
|
|
); |
4262
|
|
|
} |
4263
|
|
|
|
4264
|
46 |
|
return $str_part_one . $str_part_two; |
4265
|
|
|
} |
4266
|
|
|
|
4267
|
|
|
/** |
4268
|
|
|
* Lowercase for all words in the string. |
4269
|
|
|
* |
4270
|
|
|
* @param string $str <p>The input string.</p> |
4271
|
|
|
* @param string[] $exceptions [optional] <p>Exclusion for some words.</p> |
4272
|
|
|
* @param string $char_list [optional] <p>Additional chars that contains to words and do |
4273
|
|
|
* not start a new word.</p> |
4274
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
4275
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
4276
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
4277
|
|
|
* tr</p> |
4278
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
4279
|
|
|
* -> ß</p> |
4280
|
|
|
* |
4281
|
|
|
* @psalm-pure |
4282
|
|
|
* |
4283
|
|
|
* @return string |
4284
|
|
|
*/ |
4285
|
4 |
|
public static function lcwords( |
4286
|
|
|
string $str, |
4287
|
|
|
array $exceptions = [], |
4288
|
|
|
string $char_list = '', |
4289
|
|
|
string $encoding = 'UTF-8', |
4290
|
|
|
bool $clean_utf8 = false, |
4291
|
|
|
string $lang = null, |
4292
|
|
|
bool $try_to_keep_the_string_length = false |
4293
|
|
|
): string { |
4294
|
4 |
|
if (!$str) { |
4295
|
2 |
|
return ''; |
4296
|
|
|
} |
4297
|
|
|
|
4298
|
4 |
|
$words = self::str_to_words($str, $char_list); |
4299
|
4 |
|
$use_exceptions = $exceptions !== []; |
4300
|
|
|
|
4301
|
4 |
|
$words_str = ''; |
4302
|
4 |
|
foreach ($words as &$word) { |
4303
|
4 |
|
if (!$word) { |
4304
|
4 |
|
continue; |
4305
|
|
|
} |
4306
|
|
|
|
4307
|
|
|
if ( |
4308
|
4 |
|
!$use_exceptions |
4309
|
|
|
|| |
4310
|
4 |
|
!\in_array($word, $exceptions, true) |
4311
|
|
|
) { |
4312
|
4 |
|
$words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); |
4313
|
|
|
} else { |
4314
|
2 |
|
$words_str .= $word; |
4315
|
|
|
} |
4316
|
|
|
} |
4317
|
|
|
|
4318
|
4 |
|
return $words_str; |
4319
|
|
|
} |
4320
|
|
|
|
4321
|
|
|
/** |
4322
|
|
|
* Strip whitespace or other characters from the beginning of a UTF-8 string. |
4323
|
|
|
* |
4324
|
|
|
* EXAMPLE: <code>UTF8::ltrim(' 中文空白 '); // '中文空白 '</code> |
4325
|
|
|
* |
4326
|
|
|
* @param string $str <p>The string to be trimmed</p> |
4327
|
|
|
* @param string|null $chars <p>Optional characters to be stripped</p> |
4328
|
|
|
* |
4329
|
|
|
* @psalm-pure |
4330
|
|
|
* |
4331
|
|
|
* @return string the string with unwanted characters stripped from the left |
4332
|
|
|
*/ |
4333
|
23 |
|
public static function ltrim(string $str = '', string $chars = null): string |
4334
|
|
|
{ |
4335
|
23 |
|
if ($str === '') { |
4336
|
3 |
|
return ''; |
4337
|
|
|
} |
4338
|
|
|
|
4339
|
22 |
|
if (self::$SUPPORT['mbstring'] === true) { |
4340
|
22 |
|
if ($chars !== null) { |
4341
|
|
|
/** @noinspection PregQuoteUsageInspection */ |
4342
|
11 |
|
$chars = \preg_quote($chars); |
4343
|
11 |
|
$pattern = "^[${chars}]+"; |
4344
|
|
|
} else { |
4345
|
14 |
|
$pattern = '^[\\s]+'; |
4346
|
|
|
} |
4347
|
|
|
|
4348
|
22 |
|
return (string) \mb_ereg_replace($pattern, '', $str); |
4349
|
|
|
} |
4350
|
|
|
|
4351
|
|
|
if ($chars !== null) { |
4352
|
|
|
$chars = \preg_quote($chars, '/'); |
4353
|
|
|
$pattern = "^[${chars}]+"; |
4354
|
|
|
} else { |
4355
|
|
|
$pattern = '^[\\s]+'; |
4356
|
|
|
} |
4357
|
|
|
|
4358
|
|
|
return self::regex_replace($str, $pattern, ''); |
4359
|
|
|
} |
4360
|
|
|
|
4361
|
|
|
/** |
4362
|
|
|
* Returns the UTF-8 character with the maximum code point in the given data. |
4363
|
|
|
* |
4364
|
|
|
* EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code> |
4365
|
|
|
* |
4366
|
|
|
* @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p> |
4367
|
|
|
* |
4368
|
|
|
* @psalm-pure |
4369
|
|
|
* |
4370
|
|
|
* @return string|null the character with the highest code point than others, returns null on failure or empty input |
4371
|
|
|
*/ |
4372
|
2 |
|
public static function max($arg) |
4373
|
|
|
{ |
4374
|
2 |
|
if (\is_array($arg)) { |
4375
|
2 |
|
$arg = \implode('', $arg); |
4376
|
|
|
} |
4377
|
|
|
|
4378
|
2 |
|
$codepoints = self::codepoints($arg); |
4379
|
2 |
|
if ($codepoints === []) { |
4380
|
2 |
|
return null; |
4381
|
|
|
} |
4382
|
|
|
|
4383
|
2 |
|
$codepoint_max = \max($codepoints); |
4384
|
|
|
|
4385
|
2 |
|
return self::chr((int) $codepoint_max); |
4386
|
|
|
} |
4387
|
|
|
|
4388
|
|
|
/** |
4389
|
|
|
* Calculates and returns the maximum number of bytes taken by any |
4390
|
|
|
* UTF-8 encoded character in the given string. |
4391
|
|
|
* |
4392
|
|
|
* EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code> |
4393
|
|
|
* |
4394
|
|
|
* @param string $str <p>The original Unicode string.</p> |
4395
|
|
|
* |
4396
|
|
|
* @psalm-pure |
4397
|
|
|
* |
4398
|
|
|
* @return int |
4399
|
|
|
* <p>Max byte lengths of the given chars.</p> |
4400
|
|
|
*/ |
4401
|
2 |
|
public static function max_chr_width(string $str): int |
4402
|
|
|
{ |
4403
|
2 |
|
$bytes = self::chr_size_list($str); |
4404
|
2 |
|
if ($bytes !== []) { |
4405
|
2 |
|
return (int) \max($bytes); |
4406
|
|
|
} |
4407
|
|
|
|
4408
|
2 |
|
return 0; |
4409
|
|
|
} |
4410
|
|
|
|
4411
|
|
|
/** |
4412
|
|
|
* Checks whether mbstring is available on the server. |
4413
|
|
|
* |
4414
|
|
|
* @psalm-pure |
4415
|
|
|
* |
4416
|
|
|
* @return bool |
4417
|
|
|
* <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> |
4418
|
|
|
* |
4419
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
4420
|
|
|
*/ |
4421
|
29 |
|
public static function mbstring_loaded(): bool |
4422
|
|
|
{ |
4423
|
29 |
|
return \extension_loaded('mbstring'); |
4424
|
|
|
} |
4425
|
|
|
|
4426
|
|
|
/** |
4427
|
|
|
* Returns the UTF-8 character with the minimum code point in the given data. |
4428
|
|
|
* |
4429
|
|
|
* EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code> |
4430
|
|
|
* |
4431
|
|
|
* @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong> |
4432
|
|
|
* |
4433
|
|
|
* @psalm-pure |
4434
|
|
|
* |
4435
|
|
|
* @return string|null |
4436
|
|
|
* <p>The character with the lowest code point than others, returns null on failure or empty input.</p> |
4437
|
|
|
*/ |
4438
|
2 |
|
public static function min($arg) |
4439
|
|
|
{ |
4440
|
2 |
|
if (\is_array($arg)) { |
4441
|
2 |
|
$arg = \implode('', $arg); |
4442
|
|
|
} |
4443
|
|
|
|
4444
|
2 |
|
$codepoints = self::codepoints($arg); |
4445
|
2 |
|
if ($codepoints === []) { |
4446
|
2 |
|
return null; |
4447
|
|
|
} |
4448
|
|
|
|
4449
|
2 |
|
$codepoint_min = \min($codepoints); |
4450
|
|
|
|
4451
|
2 |
|
return self::chr((int) $codepoint_min); |
4452
|
|
|
} |
4453
|
|
|
|
4454
|
|
|
/** |
4455
|
|
|
* Normalize the encoding-"name" input. |
4456
|
|
|
* |
4457
|
|
|
* EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code> |
4458
|
|
|
* |
4459
|
|
|
* @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p> |
4460
|
|
|
* @param mixed $fallback <p>e.g.: UTF-8</p> |
4461
|
|
|
* |
4462
|
|
|
* @psalm-pure |
4463
|
|
|
* |
4464
|
|
|
* @return mixed|string |
4465
|
|
|
* <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p> |
4466
|
|
|
* |
4467
|
|
|
* @template TNormalizeEncodingFallback |
4468
|
|
|
* @phpstan-param string|TNormalizeEncodingFallback $fallback |
4469
|
|
|
* @phpstan-return string|TNormalizeEncodingFallback |
4470
|
|
|
*/ |
4471
|
339 |
|
public static function normalize_encoding($encoding, $fallback = '') |
4472
|
|
|
{ |
4473
|
|
|
/** |
4474
|
|
|
* @psalm-suppress ImpureStaticVariable |
4475
|
|
|
* |
4476
|
|
|
* @var array<string,string> |
4477
|
|
|
*/ |
4478
|
339 |
|
static $STATIC_NORMALIZE_ENCODING_CACHE = []; |
4479
|
|
|
|
4480
|
|
|
// init |
4481
|
339 |
|
$encoding = (string) $encoding; |
4482
|
|
|
|
4483
|
339 |
|
if (!$encoding) { |
4484
|
290 |
|
return $fallback; |
4485
|
|
|
} |
4486
|
|
|
|
4487
|
|
|
if ( |
4488
|
53 |
|
$encoding === 'UTF-8' |
4489
|
|
|
|| |
4490
|
53 |
|
$encoding === 'UTF8' |
4491
|
|
|
) { |
4492
|
29 |
|
return 'UTF-8'; |
4493
|
|
|
} |
4494
|
|
|
|
4495
|
|
|
if ( |
4496
|
44 |
|
$encoding === '8BIT' |
4497
|
|
|
|| |
4498
|
44 |
|
$encoding === 'BINARY' |
4499
|
|
|
) { |
4500
|
|
|
return 'CP850'; |
4501
|
|
|
} |
4502
|
|
|
|
4503
|
|
|
if ( |
4504
|
44 |
|
$encoding === 'HTML' |
4505
|
|
|
|| |
4506
|
44 |
|
$encoding === 'HTML-ENTITIES' |
4507
|
|
|
) { |
4508
|
2 |
|
return 'HTML-ENTITIES'; |
4509
|
|
|
} |
4510
|
|
|
|
4511
|
|
|
if ( |
4512
|
44 |
|
$encoding === 'ISO' |
4513
|
|
|
|| |
4514
|
44 |
|
$encoding === 'ISO-8859-1' |
4515
|
|
|
) { |
4516
|
39 |
|
return 'ISO-8859-1'; |
4517
|
|
|
} |
4518
|
|
|
|
4519
|
|
|
if ( |
4520
|
11 |
|
$encoding === '1' // only a fallback, for non "strict_types" usage ... |
4521
|
|
|
|| |
4522
|
11 |
|
$encoding === '0' // only a fallback, for non "strict_types" usage ... |
4523
|
|
|
) { |
4524
|
|
|
return $fallback; |
4525
|
|
|
} |
4526
|
|
|
|
4527
|
11 |
|
if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) { |
4528
|
8 |
|
return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding]; |
4529
|
|
|
} |
4530
|
|
|
|
4531
|
5 |
|
if (self::$ENCODINGS === null) { |
4532
|
1 |
|
self::$ENCODINGS = self::getData('encodings'); |
4533
|
|
|
} |
4534
|
|
|
|
4535
|
5 |
|
if (\in_array($encoding, self::$ENCODINGS, true)) { |
4536
|
3 |
|
$STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding; |
4537
|
|
|
|
4538
|
3 |
|
return $encoding; |
4539
|
|
|
} |
4540
|
|
|
|
4541
|
4 |
|
$encoding_original = $encoding; |
4542
|
4 |
|
$encoding = \strtoupper($encoding); |
4543
|
4 |
|
$encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding); |
4544
|
|
|
|
4545
|
4 |
|
$equivalences = [ |
4546
|
|
|
'ISO8859' => 'ISO-8859-1', |
4547
|
|
|
'ISO88591' => 'ISO-8859-1', |
4548
|
|
|
'ISO' => 'ISO-8859-1', |
4549
|
|
|
'LATIN' => 'ISO-8859-1', |
4550
|
|
|
'LATIN1' => 'ISO-8859-1', // Western European |
4551
|
|
|
'ISO88592' => 'ISO-8859-2', |
4552
|
|
|
'LATIN2' => 'ISO-8859-2', // Central European |
4553
|
|
|
'ISO88593' => 'ISO-8859-3', |
4554
|
|
|
'LATIN3' => 'ISO-8859-3', // Southern European |
4555
|
|
|
'ISO88594' => 'ISO-8859-4', |
4556
|
|
|
'LATIN4' => 'ISO-8859-4', // Northern European |
4557
|
|
|
'ISO88595' => 'ISO-8859-5', |
4558
|
|
|
'ISO88596' => 'ISO-8859-6', // Greek |
4559
|
|
|
'ISO88597' => 'ISO-8859-7', |
4560
|
|
|
'ISO88598' => 'ISO-8859-8', // Hebrew |
4561
|
|
|
'ISO88599' => 'ISO-8859-9', |
4562
|
|
|
'LATIN5' => 'ISO-8859-9', // Turkish |
4563
|
|
|
'ISO885911' => 'ISO-8859-11', |
4564
|
|
|
'TIS620' => 'ISO-8859-11', // Thai |
4565
|
|
|
'ISO885910' => 'ISO-8859-10', |
4566
|
|
|
'LATIN6' => 'ISO-8859-10', // Nordic |
4567
|
|
|
'ISO885913' => 'ISO-8859-13', |
4568
|
|
|
'LATIN7' => 'ISO-8859-13', // Baltic |
4569
|
|
|
'ISO885914' => 'ISO-8859-14', |
4570
|
|
|
'LATIN8' => 'ISO-8859-14', // Celtic |
4571
|
|
|
'ISO885915' => 'ISO-8859-15', |
4572
|
|
|
'LATIN9' => 'ISO-8859-15', // Western European (with some extra chars e.g. €) |
4573
|
|
|
'ISO885916' => 'ISO-8859-16', |
4574
|
|
|
'LATIN10' => 'ISO-8859-16', // Southeast European |
4575
|
|
|
'CP1250' => 'WINDOWS-1250', |
4576
|
|
|
'WIN1250' => 'WINDOWS-1250', |
4577
|
|
|
'WINDOWS1250' => 'WINDOWS-1250', |
4578
|
|
|
'CP1251' => 'WINDOWS-1251', |
4579
|
|
|
'WIN1251' => 'WINDOWS-1251', |
4580
|
|
|
'WINDOWS1251' => 'WINDOWS-1251', |
4581
|
|
|
'CP1252' => 'WINDOWS-1252', |
4582
|
|
|
'WIN1252' => 'WINDOWS-1252', |
4583
|
|
|
'WINDOWS1252' => 'WINDOWS-1252', |
4584
|
|
|
'CP1253' => 'WINDOWS-1253', |
4585
|
|
|
'WIN1253' => 'WINDOWS-1253', |
4586
|
|
|
'WINDOWS1253' => 'WINDOWS-1253', |
4587
|
|
|
'CP1254' => 'WINDOWS-1254', |
4588
|
|
|
'WIN1254' => 'WINDOWS-1254', |
4589
|
|
|
'WINDOWS1254' => 'WINDOWS-1254', |
4590
|
|
|
'CP1255' => 'WINDOWS-1255', |
4591
|
|
|
'WIN1255' => 'WINDOWS-1255', |
4592
|
|
|
'WINDOWS1255' => 'WINDOWS-1255', |
4593
|
|
|
'CP1256' => 'WINDOWS-1256', |
4594
|
|
|
'WIN1256' => 'WINDOWS-1256', |
4595
|
|
|
'WINDOWS1256' => 'WINDOWS-1256', |
4596
|
|
|
'CP1257' => 'WINDOWS-1257', |
4597
|
|
|
'WIN1257' => 'WINDOWS-1257', |
4598
|
|
|
'WINDOWS1257' => 'WINDOWS-1257', |
4599
|
|
|
'CP1258' => 'WINDOWS-1258', |
4600
|
|
|
'WIN1258' => 'WINDOWS-1258', |
4601
|
|
|
'WINDOWS1258' => 'WINDOWS-1258', |
4602
|
|
|
'UTF16' => 'UTF-16', |
4603
|
|
|
'UTF32' => 'UTF-32', |
4604
|
|
|
'UTF8' => 'UTF-8', |
4605
|
|
|
'UTF' => 'UTF-8', |
4606
|
|
|
'UTF7' => 'UTF-7', |
4607
|
|
|
'8BIT' => 'CP850', |
4608
|
|
|
'BINARY' => 'CP850', |
4609
|
|
|
]; |
4610
|
|
|
|
4611
|
4 |
|
if (!empty($equivalences[$encoding_upper_helper])) { |
4612
|
3 |
|
$encoding = $equivalences[$encoding_upper_helper]; |
4613
|
|
|
} |
4614
|
|
|
|
4615
|
4 |
|
$STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding; |
4616
|
|
|
|
4617
|
4 |
|
return $encoding; |
4618
|
|
|
} |
4619
|
|
|
|
4620
|
|
|
/** |
4621
|
|
|
* Standardize line ending to unix-like. |
4622
|
|
|
* |
4623
|
|
|
* @param string $str <p>The input string.</p> |
4624
|
|
|
* @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL |
4625
|
|
|
* here.</p> |
4626
|
|
|
* |
4627
|
|
|
* @psalm-pure |
4628
|
|
|
* |
4629
|
|
|
* @return string |
4630
|
|
|
* <p>A string with normalized line ending.</p> |
4631
|
|
|
*/ |
4632
|
4 |
|
public static function normalize_line_ending(string $str, $replacer = "\n"): string |
4633
|
|
|
{ |
4634
|
4 |
|
return \str_replace(["\r\n", "\r", "\n"], $replacer, $str); |
4635
|
|
|
} |
4636
|
|
|
|
4637
|
|
|
/** |
4638
|
|
|
* Normalize some MS Word special characters. |
4639
|
|
|
* |
4640
|
|
|
* EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code> |
4641
|
|
|
* |
4642
|
|
|
* @param string $str <p>The string to be normalized.</p> |
4643
|
|
|
* |
4644
|
|
|
* @psalm-pure |
4645
|
|
|
* |
4646
|
|
|
* @return string |
4647
|
|
|
* <p>A string with normalized characters for commonly used chars in Word documents.</p> |
4648
|
|
|
*/ |
4649
|
10 |
|
public static function normalize_msword(string $str): string |
4650
|
|
|
{ |
4651
|
10 |
|
return ASCII::normalize_msword($str); |
4652
|
|
|
} |
4653
|
|
|
|
4654
|
|
|
/** |
4655
|
|
|
* Normalize the whitespace. |
4656
|
|
|
* |
4657
|
|
|
* EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code> |
4658
|
|
|
* |
4659
|
|
|
* @param string $str <p>The string to be normalized.</p> |
4660
|
|
|
* @param bool $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces.</p> |
4661
|
|
|
* @param bool $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web) |
4662
|
|
|
* bidirectional text chars.</p> |
4663
|
|
|
* @param bool $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p> |
4664
|
|
|
* |
4665
|
|
|
* @psalm-pure |
4666
|
|
|
* |
4667
|
|
|
* @return string |
4668
|
|
|
* <p>A string with normalized whitespace.</p> |
4669
|
|
|
*/ |
4670
|
61 |
|
public static function normalize_whitespace( |
4671
|
|
|
string $str, |
4672
|
|
|
bool $keep_non_breaking_space = false, |
4673
|
|
|
bool $keep_bidi_unicode_controls = false, |
4674
|
|
|
bool $normalize_control_characters = false |
4675
|
|
|
): string { |
4676
|
61 |
|
return ASCII::normalize_whitespace( |
4677
|
61 |
|
$str, |
4678
|
|
|
$keep_non_breaking_space, |
4679
|
|
|
$keep_bidi_unicode_controls, |
4680
|
|
|
$normalize_control_characters |
4681
|
|
|
); |
4682
|
|
|
} |
4683
|
|
|
|
4684
|
|
|
/** |
4685
|
|
|
* Calculates Unicode code point of the given UTF-8 encoded character. |
4686
|
|
|
* |
4687
|
|
|
* INFO: opposite to UTF8::chr() |
4688
|
|
|
* |
4689
|
|
|
* EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code> |
4690
|
|
|
* |
4691
|
|
|
* @param string $chr <p>The character of which to calculate code point.<p/> |
4692
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
4693
|
|
|
* |
4694
|
|
|
* @psalm-pure |
4695
|
|
|
* |
4696
|
|
|
* @return int |
4697
|
|
|
* <p>Unicode code point of the given character,<br> |
4698
|
|
|
* 0 on invalid UTF-8 byte sequence</p> |
4699
|
|
|
*/ |
4700
|
27 |
|
public static function ord($chr, string $encoding = 'UTF-8'): int |
4701
|
|
|
{ |
4702
|
|
|
/** |
4703
|
|
|
* @psalm-suppress ImpureStaticVariable |
4704
|
|
|
* |
4705
|
|
|
* @var array<string,int> |
4706
|
|
|
*/ |
4707
|
27 |
|
static $CHAR_CACHE = []; |
4708
|
|
|
|
4709
|
|
|
// init |
4710
|
27 |
|
$chr = (string) $chr; |
4711
|
|
|
|
4712
|
27 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
4713
|
5 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
4714
|
|
|
} |
4715
|
|
|
|
4716
|
27 |
|
$cache_key = $chr . '_' . $encoding; |
4717
|
27 |
|
if (isset($CHAR_CACHE[$cache_key])) { |
4718
|
27 |
|
return $CHAR_CACHE[$cache_key]; |
4719
|
|
|
} |
4720
|
|
|
|
4721
|
|
|
// check again, if it's still not UTF-8 |
4722
|
11 |
|
if ($encoding !== 'UTF-8') { |
4723
|
3 |
|
$chr = self::encode($encoding, $chr); |
4724
|
|
|
} |
4725
|
|
|
|
4726
|
11 |
|
if (self::$ORD === null) { |
4727
|
1 |
|
self::$ORD = self::getData('ord'); |
4728
|
|
|
} |
4729
|
|
|
|
4730
|
11 |
|
if (isset(self::$ORD[$chr])) { |
4731
|
11 |
|
return $CHAR_CACHE[$cache_key] = self::$ORD[$chr]; |
4732
|
|
|
} |
4733
|
|
|
|
4734
|
|
|
// |
4735
|
|
|
// fallback via "IntlChar" |
4736
|
|
|
// |
4737
|
|
|
|
4738
|
6 |
|
if (self::$SUPPORT['intlChar'] === true) { |
4739
|
5 |
|
$code = \IntlChar::ord($chr); |
4740
|
5 |
|
if ($code) { |
|
|
|
|
4741
|
5 |
|
return $CHAR_CACHE[$cache_key] = $code; |
4742
|
|
|
} |
4743
|
|
|
} |
4744
|
|
|
|
4745
|
|
|
// |
4746
|
|
|
// fallback via vanilla php |
4747
|
|
|
// |
4748
|
|
|
|
4749
|
1 |
|
$chr = \unpack('C*', (string) \substr($chr, 0, 4)); |
4750
|
|
|
/** @noinspection PhpSillyAssignmentInspection - hack for phpstan */ |
4751
|
|
|
/** @var int[] $chr - "unpack": only false if the format string contains errors */ |
4752
|
1 |
|
$chr = $chr; |
4753
|
1 |
|
$code = $chr ? $chr[1] : 0; |
4754
|
|
|
|
4755
|
1 |
|
if ($code >= 0xF0 && isset($chr[4])) { |
4756
|
|
|
return $CHAR_CACHE[$cache_key] = ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80); |
4757
|
|
|
} |
4758
|
|
|
|
4759
|
1 |
|
if ($code >= 0xE0 && isset($chr[3])) { |
4760
|
1 |
|
return $CHAR_CACHE[$cache_key] = ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80); |
4761
|
|
|
} |
4762
|
|
|
|
4763
|
1 |
|
if ($code >= 0xC0 && isset($chr[2])) { |
4764
|
1 |
|
return $CHAR_CACHE[$cache_key] = ((($code - 0xC0) << 6) + $chr[2] - 0x80); |
4765
|
|
|
} |
4766
|
|
|
|
4767
|
|
|
return $CHAR_CACHE[$cache_key] = $code; |
4768
|
|
|
} |
4769
|
|
|
|
4770
|
|
|
/** |
4771
|
|
|
* Parses the string into an array (into the the second parameter). |
4772
|
|
|
* |
4773
|
|
|
* WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope, |
4774
|
|
|
* if the second parameter is not set! |
4775
|
|
|
* |
4776
|
|
|
* EXAMPLE: <code> |
4777
|
|
|
* UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array); |
4778
|
|
|
* echo $array['Iñtërnâtiônéàlizætiøn']; // '測試' |
4779
|
|
|
* </code> |
4780
|
|
|
* |
4781
|
|
|
* @see http://php.net/manual/en/function.parse-str.php |
4782
|
|
|
* |
4783
|
|
|
* @param string $str <p>The input string.</p> |
4784
|
|
|
* @param array $result <p>The result will be returned into this reference parameter.</p> |
4785
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
4786
|
|
|
* |
4787
|
|
|
* @psalm-pure |
4788
|
|
|
* |
4789
|
|
|
* @return bool |
4790
|
|
|
* <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p> |
4791
|
|
|
*/ |
4792
|
2 |
|
public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool |
4793
|
|
|
{ |
4794
|
2 |
|
if ($clean_utf8) { |
4795
|
2 |
|
$str = self::clean($str); |
4796
|
|
|
} |
4797
|
|
|
|
4798
|
2 |
|
if (self::$SUPPORT['mbstring'] === true) { |
4799
|
2 |
|
$return = \mb_parse_str($str, $result); |
4800
|
|
|
|
4801
|
2 |
|
return $return !== false && $result !== []; |
4802
|
|
|
} |
4803
|
|
|
|
4804
|
|
|
/** |
4805
|
|
|
* @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic |
4806
|
|
|
*/ |
4807
|
|
|
\parse_str($str, $result); |
4808
|
|
|
|
4809
|
|
|
return $result !== []; |
4810
|
|
|
} |
4811
|
|
|
|
4812
|
|
|
/** |
4813
|
|
|
* Checks if \u modifier is available that enables Unicode support in PCRE. |
4814
|
|
|
* |
4815
|
|
|
* @psalm-pure |
4816
|
|
|
* |
4817
|
|
|
* @return bool |
4818
|
|
|
* <p> |
4819
|
|
|
* <strong>true</strong> if support is available,<br> |
4820
|
|
|
* <strong>false</strong> otherwise |
4821
|
|
|
* </p> |
4822
|
|
|
*/ |
4823
|
|
|
public static function pcre_utf8_support(): bool |
4824
|
|
|
{ |
4825
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
4826
|
|
|
return (bool) @\preg_match('//u', ''); |
4827
|
|
|
} |
4828
|
|
|
|
4829
|
|
|
/** |
4830
|
|
|
* Create an array containing a range of UTF-8 characters. |
4831
|
|
|
* |
4832
|
|
|
* EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code> |
4833
|
|
|
* |
4834
|
|
|
* @param int|string $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p> |
4835
|
|
|
* @param int|string $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p> |
4836
|
|
|
* @param bool $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple |
4837
|
|
|
* "is_numeric"</p> |
4838
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
4839
|
|
|
* @param float|int $step [optional] <p> |
4840
|
|
|
* If a step value is given, it will be used as the |
4841
|
|
|
* increment between elements in the sequence. step |
4842
|
|
|
* should be given as a positive number. If not specified, |
4843
|
|
|
* step will default to 1. |
4844
|
|
|
* </p> |
4845
|
|
|
* |
4846
|
|
|
* @psalm-pure |
4847
|
|
|
* |
4848
|
|
|
* @return string[] |
4849
|
|
|
*/ |
4850
|
2 |
|
public static function range( |
4851
|
|
|
$var1, |
4852
|
|
|
$var2, |
4853
|
|
|
bool $use_ctype = true, |
4854
|
|
|
string $encoding = 'UTF-8', |
4855
|
|
|
$step = 1 |
4856
|
|
|
): array { |
4857
|
2 |
|
if (!$var1 || !$var2) { |
4858
|
2 |
|
return []; |
4859
|
|
|
} |
4860
|
|
|
|
4861
|
2 |
|
if ($step !== 1) { |
4862
|
|
|
/** |
4863
|
|
|
* @psalm-suppress RedundantConditionGivenDocblockType |
4864
|
|
|
* @psalm-suppress DocblockTypeContradiction |
4865
|
|
|
*/ |
4866
|
1 |
|
if (!\is_numeric($step)) { |
|
|
|
|
4867
|
|
|
throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step)); |
4868
|
|
|
} |
4869
|
|
|
|
4870
|
|
|
/** |
4871
|
|
|
* @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm? |
4872
|
|
|
*/ |
4873
|
1 |
|
if ($step <= 0) { |
4874
|
|
|
throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step); |
4875
|
|
|
} |
4876
|
|
|
} |
4877
|
|
|
|
4878
|
2 |
|
if ($use_ctype && self::$SUPPORT['ctype'] === false) { |
4879
|
|
|
throw new \RuntimeException('ext-ctype: is not installed'); |
4880
|
|
|
} |
4881
|
|
|
|
4882
|
2 |
|
$is_digit = false; |
4883
|
2 |
|
$is_xdigit = false; |
4884
|
|
|
|
4885
|
2 |
|
if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) { |
4886
|
2 |
|
$is_digit = true; |
4887
|
2 |
|
$start = (int) $var1; |
4888
|
2 |
|
} elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) { |
4889
|
|
|
$is_xdigit = true; |
4890
|
|
|
$start = (int) self::hex_to_int((string) $var1); |
4891
|
2 |
|
} elseif (!$use_ctype && \is_numeric($var1)) { |
4892
|
1 |
|
$start = (int) $var1; |
4893
|
|
|
} else { |
4894
|
2 |
|
$start = self::ord((string) $var1); |
4895
|
|
|
} |
4896
|
|
|
|
4897
|
2 |
|
if (!$start) { |
4898
|
|
|
return []; |
4899
|
|
|
} |
4900
|
|
|
|
4901
|
2 |
|
if ($is_digit) { |
4902
|
2 |
|
$end = (int) $var2; |
4903
|
2 |
|
} elseif ($is_xdigit) { |
4904
|
|
|
$end = (int) self::hex_to_int((string) $var2); |
4905
|
2 |
|
} elseif (!$use_ctype && \is_numeric($var2)) { |
4906
|
1 |
|
$end = (int) $var2; |
4907
|
|
|
} else { |
4908
|
2 |
|
$end = self::ord((string) $var2); |
4909
|
|
|
} |
4910
|
|
|
|
4911
|
2 |
|
if (!$end) { |
4912
|
|
|
return []; |
4913
|
|
|
} |
4914
|
|
|
|
4915
|
2 |
|
$array = []; |
4916
|
2 |
|
foreach (\range($start, $end, $step) as $i) { |
4917
|
2 |
|
$array[] = (string) self::chr((int) $i, $encoding); |
4918
|
|
|
} |
4919
|
|
|
|
4920
|
2 |
|
return $array; |
4921
|
|
|
} |
4922
|
|
|
|
4923
|
|
|
/** |
4924
|
|
|
* Multi decode HTML entity + fix urlencoded-win1252-chars. |
4925
|
|
|
* |
4926
|
|
|
* EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code> |
4927
|
|
|
* |
4928
|
|
|
* e.g: |
4929
|
|
|
* 'test+test' => 'test+test' |
4930
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
4931
|
|
|
* 'D%FCsseldorf' => 'Düsseldorf' |
4932
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
4933
|
|
|
* 'D%26%23xFC%3Bsseldorf' => 'Düsseldorf' |
4934
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
4935
|
|
|
* 'D%C3%BCsseldorf' => 'Düsseldorf' |
4936
|
|
|
* 'D%C3%83%C2%BCsseldorf' => 'Düsseldorf' |
4937
|
|
|
* 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf' |
4938
|
|
|
* |
4939
|
|
|
* @param string $str <p>The input string.</p> |
4940
|
|
|
* @param bool $multi_decode <p>Decode as often as possible.</p> |
4941
|
|
|
* |
4942
|
|
|
* @psalm-pure |
4943
|
|
|
* |
4944
|
|
|
* @return string |
4945
|
|
|
* <p>The decoded URL, as a string.</p> |
4946
|
|
|
*/ |
4947
|
6 |
|
public static function rawurldecode(string $str, bool $multi_decode = true): string |
4948
|
|
|
{ |
4949
|
6 |
|
if ($str === '') { |
4950
|
4 |
|
return ''; |
4951
|
|
|
} |
4952
|
|
|
|
4953
|
6 |
|
$str = self::urldecode_unicode_helper($str); |
4954
|
|
|
|
4955
|
6 |
|
if ($multi_decode) { |
4956
|
|
|
do { |
4957
|
5 |
|
$str_compare = $str; |
4958
|
|
|
|
4959
|
|
|
/** |
4960
|
|
|
* @psalm-suppress PossiblyInvalidArgument |
4961
|
|
|
*/ |
4962
|
5 |
|
$str = \rawurldecode( |
4963
|
5 |
|
self::html_entity_decode( |
4964
|
5 |
|
self::to_utf8($str), |
4965
|
5 |
|
\ENT_QUOTES | \ENT_HTML5 |
4966
|
|
|
) |
4967
|
|
|
); |
4968
|
5 |
|
} while ($str_compare !== $str); |
4969
|
|
|
} else { |
4970
|
|
|
/** |
4971
|
|
|
* @psalm-suppress PossiblyInvalidArgument |
4972
|
|
|
*/ |
4973
|
1 |
|
$str = \rawurldecode( |
4974
|
1 |
|
self::html_entity_decode( |
4975
|
1 |
|
self::to_utf8($str), |
4976
|
1 |
|
\ENT_QUOTES | \ENT_HTML5 |
4977
|
|
|
) |
4978
|
|
|
); |
4979
|
|
|
} |
4980
|
|
|
|
4981
|
6 |
|
return self::fix_simple_utf8($str); |
4982
|
|
|
} |
4983
|
|
|
|
4984
|
|
|
/** |
4985
|
|
|
* Replaces all occurrences of $pattern in $str by $replacement. |
4986
|
|
|
* |
4987
|
|
|
* @param string $str <p>The input string.</p> |
4988
|
|
|
* @param string $pattern <p>The regular expression pattern.</p> |
4989
|
|
|
* @param string $replacement <p>The string to replace with.</p> |
4990
|
|
|
* @param string $options [optional] <p>Matching conditions to be used.</p> |
4991
|
|
|
* @param string $delimiter [optional] <p>Delimiter the the regex. Default: '/'</p> |
4992
|
|
|
* |
4993
|
|
|
* @psalm-pure |
4994
|
|
|
* |
4995
|
|
|
* @return string |
4996
|
|
|
*/ |
4997
|
18 |
|
public static function regex_replace( |
4998
|
|
|
string $str, |
4999
|
|
|
string $pattern, |
5000
|
|
|
string $replacement, |
5001
|
|
|
string $options = '', |
5002
|
|
|
string $delimiter = '/' |
5003
|
|
|
): string { |
5004
|
18 |
|
if ($options === 'msr') { |
5005
|
9 |
|
$options = 'ms'; |
5006
|
|
|
} |
5007
|
|
|
|
5008
|
|
|
// fallback |
5009
|
18 |
|
if (!$delimiter) { |
5010
|
|
|
$delimiter = '/'; |
5011
|
|
|
} |
5012
|
|
|
|
5013
|
18 |
|
return (string) \preg_replace( |
5014
|
18 |
|
$delimiter . $pattern . $delimiter . 'u' . $options, |
5015
|
18 |
|
$replacement, |
5016
|
18 |
|
$str |
5017
|
|
|
); |
5018
|
|
|
} |
5019
|
|
|
|
5020
|
|
|
/** |
5021
|
|
|
* Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings. |
5022
|
|
|
* |
5023
|
|
|
* EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code> |
5024
|
|
|
* |
5025
|
|
|
* @param string $str <p>The input string.</p> |
5026
|
|
|
* |
5027
|
|
|
* @psalm-pure |
5028
|
|
|
* |
5029
|
|
|
* @return string |
5030
|
|
|
* <p>A string without UTF-BOM.</p> |
5031
|
|
|
*/ |
5032
|
54 |
|
public static function remove_bom(string $str): string |
5033
|
|
|
{ |
5034
|
54 |
|
if ($str === '') { |
5035
|
9 |
|
return ''; |
5036
|
|
|
} |
5037
|
|
|
|
5038
|
54 |
|
$str_length = \strlen($str); |
5039
|
54 |
|
foreach (self::$BOM as $bom_string => $bom_byte_length) { |
5040
|
54 |
|
if (\strncmp($str, $bom_string, $bom_byte_length) === 0) { |
5041
|
|
|
/** @var false|string $str_tmp - needed for PhpStan (stubs error) */ |
5042
|
9 |
|
$str_tmp = \substr($str, $bom_byte_length, $str_length); |
5043
|
9 |
|
if ($str_tmp === false) { |
5044
|
|
|
return ''; |
5045
|
|
|
} |
5046
|
|
|
|
5047
|
9 |
|
$str_length -= $bom_byte_length; |
5048
|
|
|
|
5049
|
9 |
|
$str = (string) $str_tmp; |
5050
|
|
|
} |
5051
|
|
|
} |
5052
|
|
|
|
5053
|
54 |
|
return $str; |
5054
|
|
|
} |
5055
|
|
|
|
5056
|
|
|
/** |
5057
|
|
|
* Removes duplicate occurrences of a string in another string. |
5058
|
|
|
* |
5059
|
|
|
* EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code> |
5060
|
|
|
* |
5061
|
|
|
* @param string $str <p>The base string.</p> |
5062
|
|
|
* @param string|string[] $what <p>String to search for in the base string.</p> |
5063
|
|
|
* |
5064
|
|
|
* @psalm-pure |
5065
|
|
|
* |
5066
|
|
|
* @return string |
5067
|
|
|
* <p>A string with removed duplicates.</p> |
5068
|
|
|
*/ |
5069
|
2 |
|
public static function remove_duplicates(string $str, $what = ' '): string |
5070
|
|
|
{ |
5071
|
2 |
|
if (\is_string($what)) { |
5072
|
2 |
|
$what = [$what]; |
5073
|
|
|
} |
5074
|
|
|
|
5075
|
|
|
/** |
5076
|
|
|
* @psalm-suppress RedundantConditionGivenDocblockType |
5077
|
|
|
*/ |
5078
|
2 |
|
if (\is_array($what)) { |
|
|
|
|
5079
|
2 |
|
foreach ($what as $item) { |
5080
|
2 |
|
$str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str); |
5081
|
|
|
} |
5082
|
|
|
} |
5083
|
|
|
|
5084
|
2 |
|
return $str; |
5085
|
|
|
} |
5086
|
|
|
|
5087
|
|
|
/** |
5088
|
|
|
* Remove html via "strip_tags()" from the string. |
5089
|
|
|
* |
5090
|
|
|
* @param string $str <p>The input string.</p> |
5091
|
|
|
* @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which |
5092
|
|
|
* should not be stripped. Default: null |
5093
|
|
|
* </p> |
5094
|
|
|
* |
5095
|
|
|
* @psalm-pure |
5096
|
|
|
* |
5097
|
|
|
* @return string |
5098
|
|
|
* <p>A string with without html tags.</p> |
5099
|
|
|
*/ |
5100
|
6 |
|
public static function remove_html(string $str, string $allowable_tags = ''): string |
5101
|
|
|
{ |
5102
|
6 |
|
return \strip_tags($str, $allowable_tags); |
5103
|
|
|
} |
5104
|
|
|
|
5105
|
|
|
/** |
5106
|
|
|
* Remove all breaks [<br> | \r\n | \r | \n | ...] from the string. |
5107
|
|
|
* |
5108
|
|
|
* @param string $str <p>The input string.</p> |
5109
|
|
|
* @param string $replacement [optional] <p>Default is a empty string.</p> |
5110
|
|
|
* |
5111
|
|
|
* @psalm-pure |
5112
|
|
|
* |
5113
|
|
|
* @return string |
5114
|
|
|
* <p>A string without breaks.</p> |
5115
|
|
|
*/ |
5116
|
6 |
|
public static function remove_html_breaks(string $str, string $replacement = ''): string |
5117
|
|
|
{ |
5118
|
6 |
|
return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str); |
5119
|
|
|
} |
5120
|
|
|
|
5121
|
|
|
/** |
5122
|
|
|
* Remove invisible characters from a string. |
5123
|
|
|
* |
5124
|
|
|
* e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script. |
5125
|
|
|
* |
5126
|
|
|
* EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code> |
5127
|
|
|
* |
5128
|
|
|
* copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php |
5129
|
|
|
* |
5130
|
|
|
* @param string $str <p>The input string.</p> |
5131
|
|
|
* @param bool $url_encoded [optional] <p> |
5132
|
|
|
* Try to remove url encoded control character. |
5133
|
|
|
* WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa. |
5134
|
|
|
* <br> |
5135
|
|
|
* Default: false |
5136
|
|
|
* </p> |
5137
|
|
|
* @param string $replacement [optional] <p>The replacement character.</p> |
5138
|
|
|
* @param bool $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p> |
5139
|
|
|
* |
5140
|
|
|
* @psalm-pure |
5141
|
|
|
* |
5142
|
|
|
* @return string |
5143
|
|
|
* <p>A string without invisible chars.</p> |
5144
|
|
|
*/ |
5145
|
96 |
|
public static function remove_invisible_characters( |
5146
|
|
|
string $str, |
5147
|
|
|
bool $url_encoded = false, |
5148
|
|
|
string $replacement = '', |
5149
|
|
|
bool $keep_basic_control_characters = true |
5150
|
|
|
): string { |
5151
|
96 |
|
return ASCII::remove_invisible_characters( |
5152
|
96 |
|
$str, |
5153
|
|
|
$url_encoded, |
5154
|
|
|
$replacement, |
5155
|
|
|
$keep_basic_control_characters |
5156
|
|
|
); |
5157
|
|
|
} |
5158
|
|
|
|
5159
|
|
|
/** |
5160
|
|
|
* Returns a new string with the prefix $substring removed, if present. |
5161
|
|
|
* |
5162
|
|
|
* @param string $str <p>The input string.</p> |
5163
|
|
|
* @param string $substring <p>The prefix to remove.</p> |
5164
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
5165
|
|
|
* |
5166
|
|
|
* @psalm-pure |
5167
|
|
|
* |
5168
|
|
|
* @return string |
5169
|
|
|
* <p>A string without the prefix $substring.</p> |
5170
|
|
|
*/ |
5171
|
12 |
|
public static function remove_left( |
5172
|
|
|
string $str, |
5173
|
|
|
string $substring, |
5174
|
|
|
string $encoding = 'UTF-8' |
5175
|
|
|
): string { |
5176
|
|
|
if ( |
5177
|
12 |
|
$substring |
5178
|
|
|
&& |
5179
|
12 |
|
\strpos($str, $substring) === 0 |
5180
|
|
|
) { |
5181
|
6 |
|
if ($encoding === 'UTF-8') { |
5182
|
4 |
|
return (string) \mb_substr( |
5183
|
4 |
|
$str, |
5184
|
4 |
|
(int) \mb_strlen($substring) |
5185
|
|
|
); |
5186
|
|
|
} |
5187
|
|
|
|
5188
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
5189
|
|
|
|
5190
|
2 |
|
return (string) self::substr( |
5191
|
2 |
|
$str, |
5192
|
2 |
|
(int) self::strlen($substring, $encoding), |
5193
|
2 |
|
null, |
5194
|
|
|
$encoding |
5195
|
|
|
); |
5196
|
|
|
} |
5197
|
|
|
|
5198
|
6 |
|
return $str; |
5199
|
|
|
} |
5200
|
|
|
|
5201
|
|
|
/** |
5202
|
|
|
* Returns a new string with the suffix $substring removed, if present. |
5203
|
|
|
* |
5204
|
|
|
* @param string $str |
5205
|
|
|
* @param string $substring <p>The suffix to remove.</p> |
5206
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
5207
|
|
|
* |
5208
|
|
|
* @psalm-pure |
5209
|
|
|
* |
5210
|
|
|
* @return string |
5211
|
|
|
* <p>A string having a $str without the suffix $substring.</p> |
5212
|
|
|
*/ |
5213
|
12 |
|
public static function remove_right( |
5214
|
|
|
string $str, |
5215
|
|
|
string $substring, |
5216
|
|
|
string $encoding = 'UTF-8' |
5217
|
|
|
): string { |
5218
|
12 |
|
if ($substring && \substr($str, -\strlen($substring)) === $substring) { |
5219
|
6 |
|
if ($encoding === 'UTF-8') { |
5220
|
4 |
|
return (string) \mb_substr( |
5221
|
4 |
|
$str, |
5222
|
4 |
|
0, |
5223
|
4 |
|
(int) \mb_strlen($str) - (int) \mb_strlen($substring) |
5224
|
|
|
); |
5225
|
|
|
} |
5226
|
|
|
|
5227
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
5228
|
|
|
|
5229
|
2 |
|
return (string) self::substr( |
5230
|
2 |
|
$str, |
5231
|
2 |
|
0, |
5232
|
2 |
|
(int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding), |
5233
|
|
|
$encoding |
5234
|
|
|
); |
5235
|
|
|
} |
5236
|
|
|
|
5237
|
6 |
|
return $str; |
5238
|
|
|
} |
5239
|
|
|
|
5240
|
|
|
/** |
5241
|
|
|
* Replaces all occurrences of $search in $str by $replacement. |
5242
|
|
|
* |
5243
|
|
|
* @param string $str <p>The input string.</p> |
5244
|
|
|
* @param string $search <p>The needle to search for.</p> |
5245
|
|
|
* @param string $replacement <p>The string to replace with.</p> |
5246
|
|
|
* @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
5247
|
|
|
* |
5248
|
|
|
* @psalm-pure |
5249
|
|
|
* |
5250
|
|
|
* @return string |
5251
|
|
|
* <p>A string with replaced parts.</p> |
5252
|
|
|
*/ |
5253
|
29 |
|
public static function replace( |
5254
|
|
|
string $str, |
5255
|
|
|
string $search, |
5256
|
|
|
string $replacement, |
5257
|
|
|
bool $case_sensitive = true |
5258
|
|
|
): string { |
5259
|
29 |
|
if ($case_sensitive) { |
5260
|
22 |
|
return \str_replace($search, $replacement, $str); |
5261
|
|
|
} |
5262
|
|
|
|
5263
|
7 |
|
return self::str_ireplace($search, $replacement, $str); |
|
|
|
|
5264
|
|
|
} |
5265
|
|
|
|
5266
|
|
|
/** |
5267
|
|
|
* Replaces all occurrences of $search in $str by $replacement. |
5268
|
|
|
* |
5269
|
|
|
* @param string $str <p>The input string.</p> |
5270
|
|
|
* @param array $search <p>The elements to search for.</p> |
5271
|
|
|
* @param array|string $replacement <p>The string to replace with.</p> |
5272
|
|
|
* @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
5273
|
|
|
* |
5274
|
|
|
* @psalm-pure |
5275
|
|
|
* |
5276
|
|
|
* @return string |
5277
|
|
|
* <p>A string with replaced parts.</p> |
5278
|
|
|
*/ |
5279
|
30 |
|
public static function replace_all( |
5280
|
|
|
string $str, |
5281
|
|
|
array $search, |
5282
|
|
|
$replacement, |
5283
|
|
|
bool $case_sensitive = true |
5284
|
|
|
): string { |
5285
|
30 |
|
if ($case_sensitive) { |
5286
|
23 |
|
return \str_replace($search, $replacement, $str); |
5287
|
|
|
} |
5288
|
|
|
|
5289
|
7 |
|
return self::str_ireplace($search, $replacement, $str); |
|
|
|
|
5290
|
|
|
} |
5291
|
|
|
|
5292
|
|
|
/** |
5293
|
|
|
* Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement. |
5294
|
|
|
* |
5295
|
|
|
* EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code> |
5296
|
|
|
* |
5297
|
|
|
* @param string $str <p>The input string</p> |
5298
|
|
|
* @param string $replacement_char <p>The replacement character.</p> |
5299
|
|
|
* @param bool $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p> |
5300
|
|
|
* |
5301
|
|
|
* @psalm-pure |
5302
|
|
|
* |
5303
|
|
|
* @return string |
5304
|
|
|
* <p>A string without diamond question marks (�).</p> |
5305
|
|
|
*/ |
5306
|
35 |
|
public static function replace_diamond_question_mark( |
5307
|
|
|
string $str, |
5308
|
|
|
string $replacement_char = '', |
5309
|
|
|
bool $process_invalid_utf8_chars = true |
5310
|
|
|
): string { |
5311
|
35 |
|
if ($str === '') { |
5312
|
9 |
|
return ''; |
5313
|
|
|
} |
5314
|
|
|
|
5315
|
35 |
|
if ($process_invalid_utf8_chars) { |
5316
|
35 |
|
if ($replacement_char === '') { |
5317
|
35 |
|
$replacement_char_helper = 'none'; |
5318
|
|
|
} else { |
5319
|
2 |
|
$replacement_char_helper = \ord($replacement_char); |
5320
|
|
|
} |
5321
|
|
|
|
5322
|
35 |
|
if (self::$SUPPORT['mbstring'] === false) { |
5323
|
|
|
// if there is no native support for "mbstring", |
5324
|
|
|
// then we need to clean the string before ... |
5325
|
|
|
$str = self::clean($str); |
5326
|
|
|
} |
5327
|
|
|
|
5328
|
|
|
/** |
5329
|
|
|
* @psalm-suppress ImpureFunctionCall - we will reset the value in the next step |
5330
|
|
|
*/ |
5331
|
35 |
|
$save = \mb_substitute_character(); |
5332
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */ |
5333
|
35 |
|
@\mb_substitute_character($replacement_char_helper); |
|
|
|
|
5334
|
|
|
// the polyfill maybe return false, so cast to string |
5335
|
35 |
|
$str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8'); |
5336
|
35 |
|
\mb_substitute_character($save); |
|
|
|
|
5337
|
|
|
} |
5338
|
|
|
|
5339
|
35 |
|
return \str_replace( |
5340
|
|
|
[ |
5341
|
35 |
|
"\xEF\xBF\xBD", |
5342
|
|
|
'�', |
5343
|
|
|
], |
5344
|
|
|
[ |
5345
|
35 |
|
$replacement_char, |
5346
|
35 |
|
$replacement_char, |
5347
|
|
|
], |
5348
|
35 |
|
$str |
5349
|
|
|
); |
5350
|
|
|
} |
5351
|
|
|
|
5352
|
|
|
/** |
5353
|
|
|
* Strip whitespace or other characters from the end of a UTF-8 string. |
5354
|
|
|
* |
5355
|
|
|
* EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白- '); // '-ABC-中文空白-'</code> |
5356
|
|
|
* |
5357
|
|
|
* @param string $str <p>The string to be trimmed.</p> |
5358
|
|
|
* @param string|null $chars <p>Optional characters to be stripped.</p> |
5359
|
|
|
* |
5360
|
|
|
* @psalm-pure |
5361
|
|
|
* |
5362
|
|
|
* @return string |
5363
|
|
|
* <p>A string with unwanted characters stripped from the right.</p> |
5364
|
|
|
*/ |
5365
|
21 |
|
public static function rtrim(string $str = '', string $chars = null): string |
5366
|
|
|
{ |
5367
|
21 |
|
if ($str === '') { |
5368
|
3 |
|
return ''; |
5369
|
|
|
} |
5370
|
|
|
|
5371
|
20 |
|
if (self::$SUPPORT['mbstring'] === true) { |
5372
|
20 |
|
if ($chars !== null) { |
5373
|
|
|
/** @noinspection PregQuoteUsageInspection */ |
5374
|
9 |
|
$chars = \preg_quote($chars); |
5375
|
9 |
|
$pattern = "[${chars}]+$"; |
5376
|
|
|
} else { |
5377
|
14 |
|
$pattern = '[\\s]+$'; |
5378
|
|
|
} |
5379
|
|
|
|
5380
|
20 |
|
return (string) \mb_ereg_replace($pattern, '', $str); |
5381
|
|
|
} |
5382
|
|
|
|
5383
|
|
|
if ($chars !== null) { |
5384
|
|
|
$chars = \preg_quote($chars, '/'); |
5385
|
|
|
$pattern = "[${chars}]+$"; |
5386
|
|
|
} else { |
5387
|
|
|
$pattern = '[\\s]+$'; |
5388
|
|
|
} |
5389
|
|
|
|
5390
|
|
|
return self::regex_replace($str, $pattern, ''); |
5391
|
|
|
} |
5392
|
|
|
|
5393
|
|
|
/** |
5394
|
|
|
* WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging. |
5395
|
|
|
* |
5396
|
|
|
* @param bool $useEcho |
5397
|
|
|
* |
5398
|
|
|
* @psalm-pure |
5399
|
|
|
* |
5400
|
|
|
* @return string|void |
5401
|
|
|
*/ |
5402
|
2 |
|
public static function showSupport(bool $useEcho = true) |
5403
|
|
|
{ |
5404
|
|
|
// init |
5405
|
2 |
|
$html = ''; |
5406
|
|
|
|
5407
|
2 |
|
$html .= '<pre>'; |
5408
|
2 |
|
foreach (self::$SUPPORT as $key => &$value) { |
5409
|
2 |
|
$html .= $key . ' - ' . \print_r($value, true) . "\n<br>"; |
|
|
|
|
5410
|
|
|
} |
5411
|
2 |
|
$html .= '</pre>'; |
5412
|
|
|
|
5413
|
2 |
|
if ($useEcho) { |
5414
|
1 |
|
echo $html; |
5415
|
|
|
} |
5416
|
|
|
|
5417
|
2 |
|
return $html; |
5418
|
|
|
} |
5419
|
|
|
|
5420
|
|
|
/** |
5421
|
|
|
* Converts a UTF-8 character to HTML Numbered Entity like "{". |
5422
|
|
|
* |
5423
|
|
|
* EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // 'κ'</code> |
5424
|
|
|
* |
5425
|
|
|
* @param string $char <p>The Unicode character to be encoded as numbered entity.</p> |
5426
|
|
|
* @param bool $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</> |
5427
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5428
|
|
|
* |
5429
|
|
|
* @psalm-pure |
5430
|
|
|
* |
5431
|
|
|
* @return string |
5432
|
|
|
* <p>The HTML numbered entity for the given character.</p> |
5433
|
|
|
*/ |
5434
|
2 |
|
public static function single_chr_html_encode( |
5435
|
|
|
string $char, |
5436
|
|
|
bool $keep_ascii_chars = false, |
5437
|
|
|
string $encoding = 'UTF-8' |
5438
|
|
|
): string { |
5439
|
2 |
|
if ($char === '') { |
5440
|
2 |
|
return ''; |
5441
|
|
|
} |
5442
|
|
|
|
5443
|
|
|
if ( |
5444
|
2 |
|
$keep_ascii_chars |
5445
|
|
|
&& |
5446
|
2 |
|
ASCII::is_ascii($char) |
5447
|
|
|
) { |
5448
|
2 |
|
return $char; |
5449
|
|
|
} |
5450
|
|
|
|
5451
|
2 |
|
return '&#' . self::ord($char, $encoding) . ';'; |
5452
|
|
|
} |
5453
|
|
|
|
5454
|
|
|
/** |
5455
|
|
|
* @param string $str |
5456
|
|
|
* @param int $tab_length |
5457
|
|
|
* |
5458
|
|
|
* @psalm-pure |
5459
|
|
|
* |
5460
|
|
|
* @return string |
5461
|
|
|
*/ |
5462
|
5 |
|
public static function spaces_to_tabs(string $str, int $tab_length = 4): string |
5463
|
|
|
{ |
5464
|
5 |
|
if ($tab_length === 4) { |
5465
|
3 |
|
$tab = ' '; |
5466
|
2 |
|
} elseif ($tab_length === 2) { |
5467
|
1 |
|
$tab = ' '; |
5468
|
|
|
} else { |
5469
|
1 |
|
$tab = \str_repeat(' ', $tab_length); |
5470
|
|
|
} |
5471
|
|
|
|
5472
|
5 |
|
return \str_replace($tab, "\t", $str); |
5473
|
|
|
} |
5474
|
|
|
|
5475
|
|
|
/** |
5476
|
|
|
* Returns a camelCase version of the string. Trims surrounding spaces, |
5477
|
|
|
* capitalizes letters following digits, spaces, dashes and underscores, |
5478
|
|
|
* and removes spaces, dashes, as well as underscores. |
5479
|
|
|
* |
5480
|
|
|
* @param string $str <p>The input string.</p> |
5481
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
5482
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
5483
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
5484
|
|
|
* tr</p> |
5485
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
5486
|
|
|
* -> ß</p> |
5487
|
|
|
* |
5488
|
|
|
* @psalm-pure |
5489
|
|
|
* |
5490
|
|
|
* @return string |
5491
|
|
|
*/ |
5492
|
32 |
|
public static function str_camelize( |
5493
|
|
|
string $str, |
5494
|
|
|
string $encoding = 'UTF-8', |
5495
|
|
|
bool $clean_utf8 = false, |
5496
|
|
|
string $lang = null, |
5497
|
|
|
bool $try_to_keep_the_string_length = false |
5498
|
|
|
): string { |
5499
|
32 |
|
if ($clean_utf8) { |
5500
|
|
|
$str = self::clean($str); |
5501
|
|
|
} |
5502
|
|
|
|
5503
|
32 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
5504
|
26 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
5505
|
|
|
} |
5506
|
|
|
|
5507
|
32 |
|
$str = self::lcfirst( |
5508
|
32 |
|
\trim($str), |
5509
|
32 |
|
$encoding, |
5510
|
32 |
|
false, |
5511
|
32 |
|
$lang, |
5512
|
32 |
|
$try_to_keep_the_string_length |
5513
|
|
|
); |
5514
|
32 |
|
$str = (string) \preg_replace('/^[-_]+/', '', $str); |
5515
|
|
|
|
5516
|
32 |
|
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length; |
5517
|
|
|
|
5518
|
32 |
|
$str = (string) \preg_replace_callback( |
5519
|
32 |
|
'/[-_\\s]+(.)?/u', |
5520
|
|
|
/** |
5521
|
|
|
* @param array $match |
5522
|
|
|
* |
5523
|
|
|
* @psalm-pure |
5524
|
|
|
* |
5525
|
|
|
* @return string |
5526
|
|
|
*/ |
5527
|
32 |
|
static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string { |
5528
|
27 |
|
if (isset($match[1])) { |
5529
|
27 |
|
if ($use_mb_functions) { |
5530
|
27 |
|
if ($encoding === 'UTF-8') { |
5531
|
27 |
|
return \mb_strtoupper($match[1]); |
5532
|
|
|
} |
5533
|
|
|
|
5534
|
|
|
return \mb_strtoupper($match[1], $encoding); |
5535
|
|
|
} |
5536
|
|
|
|
5537
|
|
|
return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length); |
5538
|
|
|
} |
5539
|
|
|
|
5540
|
1 |
|
return ''; |
5541
|
32 |
|
}, |
5542
|
32 |
|
$str |
5543
|
|
|
); |
5544
|
|
|
|
5545
|
32 |
|
return (string) \preg_replace_callback( |
5546
|
32 |
|
'/[\\p{N}]+(.)?/u', |
5547
|
|
|
/** |
5548
|
|
|
* @param array $match |
5549
|
|
|
* |
5550
|
|
|
* @psalm-pure |
5551
|
|
|
* |
5552
|
|
|
* @return string |
5553
|
|
|
*/ |
5554
|
32 |
|
static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string { |
5555
|
6 |
|
if ($use_mb_functions) { |
5556
|
6 |
|
if ($encoding === 'UTF-8') { |
5557
|
6 |
|
return \mb_strtoupper($match[0]); |
5558
|
|
|
} |
5559
|
|
|
|
5560
|
|
|
return \mb_strtoupper($match[0], $encoding); |
5561
|
|
|
} |
5562
|
|
|
|
5563
|
|
|
return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); |
5564
|
32 |
|
}, |
5565
|
32 |
|
$str |
5566
|
|
|
); |
5567
|
|
|
} |
5568
|
|
|
|
5569
|
|
|
/** |
5570
|
|
|
* Returns the string with the first letter of each word capitalized, |
5571
|
|
|
* except for when the word is a name which shouldn't be capitalized. |
5572
|
|
|
* |
5573
|
|
|
* @param string $str |
5574
|
|
|
* |
5575
|
|
|
* @psalm-pure |
5576
|
|
|
* |
5577
|
|
|
* @return string |
5578
|
|
|
* <p>A string with $str capitalized.</p> |
5579
|
|
|
*/ |
5580
|
1 |
|
public static function str_capitalize_name(string $str): string |
5581
|
|
|
{ |
5582
|
1 |
|
return self::str_capitalize_name_helper( |
5583
|
1 |
|
self::str_capitalize_name_helper( |
5584
|
1 |
|
self::collapse_whitespace($str), |
5585
|
1 |
|
' ' |
5586
|
|
|
), |
5587
|
1 |
|
'-' |
5588
|
|
|
); |
5589
|
|
|
} |
5590
|
|
|
|
5591
|
|
|
/** |
5592
|
|
|
* Returns true if the string contains $needle, false otherwise. By default |
5593
|
|
|
* the comparison is case-sensitive, but can be made insensitive by setting |
5594
|
|
|
* $case_sensitive to false. |
5595
|
|
|
* |
5596
|
|
|
* @param string $haystack <p>The input string.</p> |
5597
|
|
|
* @param string $needle <p>Substring to look for.</p> |
5598
|
|
|
* @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
5599
|
|
|
* |
5600
|
|
|
* @psalm-pure |
5601
|
|
|
* |
5602
|
|
|
* @return bool |
5603
|
|
|
* <p>Whether or not $haystack contains $needle.</p> |
5604
|
|
|
*/ |
5605
|
21 |
|
public static function str_contains( |
5606
|
|
|
string $haystack, |
5607
|
|
|
string $needle, |
5608
|
|
|
bool $case_sensitive = true |
5609
|
|
|
): bool { |
5610
|
21 |
|
if ($case_sensitive) { |
5611
|
11 |
|
if (\PHP_VERSION_ID >= 80000) { |
5612
|
|
|
/** @phpstan-ignore-next-line - only for PHP8 */ |
5613
|
11 |
|
return \str_contains($haystack, $needle); |
5614
|
|
|
} |
5615
|
|
|
|
5616
|
|
|
return \strpos($haystack, $needle) !== false; |
5617
|
|
|
} |
5618
|
|
|
|
5619
|
10 |
|
return \mb_stripos($haystack, $needle) !== false; |
5620
|
|
|
} |
5621
|
|
|
|
5622
|
|
|
/** |
5623
|
|
|
* Returns true if the string contains all $needles, false otherwise. By |
5624
|
|
|
* default the comparison is case-sensitive, but can be made insensitive by |
5625
|
|
|
* setting $case_sensitive to false. |
5626
|
|
|
* |
5627
|
|
|
* @param string $haystack <p>The input string.</p> |
5628
|
|
|
* @param array $needles <p>SubStrings to look for.</p> |
5629
|
|
|
* @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
5630
|
|
|
* |
5631
|
|
|
* @psalm-pure |
5632
|
|
|
* |
5633
|
|
|
* @return bool |
5634
|
|
|
* <p>Whether or not $haystack contains $needle.</p> |
5635
|
|
|
*/ |
5636
|
45 |
|
public static function str_contains_all( |
5637
|
|
|
string $haystack, |
5638
|
|
|
array $needles, |
5639
|
|
|
bool $case_sensitive = true |
5640
|
|
|
): bool { |
5641
|
45 |
|
if ($haystack === '' || $needles === []) { |
5642
|
1 |
|
return false; |
5643
|
|
|
} |
5644
|
|
|
|
5645
|
44 |
|
foreach ($needles as &$needle) { |
5646
|
44 |
|
if ($case_sensitive) { |
5647
|
24 |
|
if (!$needle || \strpos($haystack, $needle) === false) { |
5648
|
12 |
|
return false; |
5649
|
|
|
} |
5650
|
|
|
} |
5651
|
|
|
|
5652
|
33 |
|
if (!$needle || \mb_stripos($haystack, $needle) === false) { |
5653
|
8 |
|
return false; |
5654
|
|
|
} |
5655
|
|
|
} |
5656
|
|
|
|
5657
|
24 |
|
return true; |
5658
|
|
|
} |
5659
|
|
|
|
5660
|
|
|
/** |
5661
|
|
|
* Returns true if the string contains any $needles, false otherwise. By |
5662
|
|
|
* default the comparison is case-sensitive, but can be made insensitive by |
5663
|
|
|
* setting $case_sensitive to false. |
5664
|
|
|
* |
5665
|
|
|
* @param string $haystack <p>The input string.</p> |
5666
|
|
|
* @param array $needles <p>SubStrings to look for.</p> |
5667
|
|
|
* @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
5668
|
|
|
* |
5669
|
|
|
* @psalm-pure |
5670
|
|
|
* |
5671
|
|
|
* @return bool |
5672
|
|
|
* <p>Whether or not $str contains $needle.</p> |
5673
|
|
|
*/ |
5674
|
46 |
|
public static function str_contains_any( |
5675
|
|
|
string $haystack, |
5676
|
|
|
array $needles, |
5677
|
|
|
bool $case_sensitive = true |
5678
|
|
|
): bool { |
5679
|
46 |
|
if ($haystack === '' || $needles === []) { |
5680
|
1 |
|
return false; |
5681
|
|
|
} |
5682
|
|
|
|
5683
|
45 |
|
foreach ($needles as &$needle) { |
5684
|
45 |
|
if (!$needle) { |
5685
|
|
|
continue; |
5686
|
|
|
} |
5687
|
|
|
|
5688
|
45 |
|
if ($case_sensitive) { |
5689
|
25 |
|
if (\strpos($haystack, $needle) !== false) { |
5690
|
14 |
|
return true; |
5691
|
|
|
} |
5692
|
|
|
|
5693
|
13 |
|
continue; |
5694
|
|
|
} |
5695
|
|
|
|
5696
|
20 |
|
if (\mb_stripos($haystack, $needle) !== false) { |
5697
|
12 |
|
return true; |
5698
|
|
|
} |
5699
|
|
|
} |
5700
|
|
|
|
5701
|
19 |
|
return false; |
5702
|
|
|
} |
5703
|
|
|
|
5704
|
|
|
/** |
5705
|
|
|
* Returns a lowercase and trimmed string separated by dashes. Dashes are |
5706
|
|
|
* inserted before uppercase characters (with the exception of the first |
5707
|
|
|
* character of the string), and in place of spaces as well as underscores. |
5708
|
|
|
* |
5709
|
|
|
* @param string $str <p>The input string.</p> |
5710
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5711
|
|
|
* |
5712
|
|
|
* @psalm-pure |
5713
|
|
|
* |
5714
|
|
|
* @return string |
5715
|
|
|
*/ |
5716
|
19 |
|
public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string |
5717
|
|
|
{ |
5718
|
19 |
|
return self::str_delimit($str, '-', $encoding); |
5719
|
|
|
} |
5720
|
|
|
|
5721
|
|
|
/** |
5722
|
|
|
* Returns a lowercase and trimmed string separated by the given delimiter. |
5723
|
|
|
* Delimiters are inserted before uppercase characters (with the exception |
5724
|
|
|
* of the first character of the string), and in place of spaces, dashes, |
5725
|
|
|
* and underscores. Alpha delimiters are not converted to lowercase. |
5726
|
|
|
* |
5727
|
|
|
* @param string $str <p>The input string.</p> |
5728
|
|
|
* @param string $delimiter <p>Sequence used to separate parts of the string.</p> |
5729
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5730
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
5731
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
5732
|
|
|
* tr</p> |
5733
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> |
5734
|
|
|
* ß</p> |
5735
|
|
|
* |
5736
|
|
|
* @psalm-pure |
5737
|
|
|
* |
5738
|
|
|
* @return string |
5739
|
|
|
*/ |
5740
|
49 |
|
public static function str_delimit( |
5741
|
|
|
string $str, |
5742
|
|
|
string $delimiter, |
5743
|
|
|
string $encoding = 'UTF-8', |
5744
|
|
|
bool $clean_utf8 = false, |
5745
|
|
|
string $lang = null, |
5746
|
|
|
bool $try_to_keep_the_string_length = false |
5747
|
|
|
): string { |
5748
|
49 |
|
if (self::$SUPPORT['mbstring'] === true) { |
5749
|
49 |
|
$str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str)); |
5750
|
|
|
|
5751
|
49 |
|
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length; |
5752
|
49 |
|
if ($use_mb_functions && $encoding === 'UTF-8') { |
5753
|
22 |
|
$str = \mb_strtolower($str); |
5754
|
|
|
} else { |
5755
|
27 |
|
$str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); |
5756
|
|
|
} |
5757
|
|
|
|
5758
|
49 |
|
return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str); |
5759
|
|
|
} |
5760
|
|
|
|
5761
|
|
|
$str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str)); |
5762
|
|
|
|
5763
|
|
|
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length; |
5764
|
|
|
if ($use_mb_functions && $encoding === 'UTF-8') { |
5765
|
|
|
$str = \mb_strtolower($str); |
5766
|
|
|
} else { |
5767
|
|
|
$str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); |
5768
|
|
|
} |
5769
|
|
|
|
5770
|
|
|
return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str); |
5771
|
|
|
} |
5772
|
|
|
|
5773
|
|
|
/** |
5774
|
|
|
* Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32. |
5775
|
|
|
* |
5776
|
|
|
* EXAMPLE: <code> |
5777
|
|
|
* UTF8::str_detect_encoding('中文空白'); // 'UTF-8' |
5778
|
|
|
* UTF8::str_detect_encoding('Abc'); // 'ASCII' |
5779
|
|
|
* </code> |
5780
|
|
|
* |
5781
|
|
|
* @param string $str <p>The input string.</p> |
5782
|
|
|
* |
5783
|
|
|
* @psalm-pure |
5784
|
|
|
* |
5785
|
|
|
* @return false|string |
5786
|
|
|
* <p> |
5787
|
|
|
* The detected string-encoding e.g. UTF-8 or UTF-16BE,<br> |
5788
|
|
|
* otherwise it will return false e.g. for BINARY or not detected encoding. |
5789
|
|
|
* </p> |
5790
|
|
|
*/ |
5791
|
30 |
|
public static function str_detect_encoding($str) |
5792
|
|
|
{ |
5793
|
|
|
// init |
5794
|
30 |
|
$str = (string) $str; |
5795
|
|
|
|
5796
|
|
|
// |
5797
|
|
|
// 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ... |
5798
|
|
|
// |
5799
|
|
|
|
5800
|
30 |
|
if (self::is_binary($str, self::string_has_bom($str) ? false : true)) { |
5801
|
10 |
|
$is_utf32 = self::is_utf32($str, false); |
5802
|
10 |
|
if ($is_utf32 === 1) { |
5803
|
|
|
return 'UTF-32LE'; |
5804
|
|
|
} |
5805
|
10 |
|
if ($is_utf32 === 2) { |
5806
|
1 |
|
return 'UTF-32BE'; |
5807
|
|
|
} |
5808
|
|
|
|
5809
|
10 |
|
$is_utf16 = self::is_utf16($str, false); |
5810
|
10 |
|
if ($is_utf16 === 1) { |
5811
|
3 |
|
return 'UTF-16LE'; |
5812
|
|
|
} |
5813
|
10 |
|
if ($is_utf16 === 2) { |
5814
|
2 |
|
return 'UTF-16BE'; |
5815
|
|
|
} |
5816
|
|
|
|
5817
|
|
|
// is binary but not "UTF-16" or "UTF-32" |
5818
|
8 |
|
return false; |
5819
|
|
|
} |
5820
|
|
|
|
5821
|
|
|
// |
5822
|
|
|
// 2.) simple check for ASCII chars |
5823
|
|
|
// |
5824
|
|
|
|
5825
|
27 |
|
if (ASCII::is_ascii($str)) { |
5826
|
10 |
|
return 'ASCII'; |
5827
|
|
|
} |
5828
|
|
|
|
5829
|
|
|
// |
5830
|
|
|
// 3.) simple check for UTF-8 chars |
5831
|
|
|
// |
5832
|
|
|
|
5833
|
27 |
|
if (self::is_utf8_string($str)) { |
5834
|
19 |
|
return 'UTF-8'; |
5835
|
|
|
} |
5836
|
|
|
|
5837
|
|
|
// |
5838
|
|
|
// 4.) check via "mb_detect_encoding()" |
5839
|
|
|
// |
5840
|
|
|
// INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()" |
5841
|
|
|
|
5842
|
16 |
|
$encoding_detecting_order = [ |
5843
|
|
|
'ISO-8859-1', |
5844
|
|
|
'ISO-8859-2', |
5845
|
|
|
'ISO-8859-3', |
5846
|
|
|
'ISO-8859-4', |
5847
|
|
|
'ISO-8859-5', |
5848
|
|
|
'ISO-8859-6', |
5849
|
|
|
'ISO-8859-7', |
5850
|
|
|
'ISO-8859-8', |
5851
|
|
|
'ISO-8859-9', |
5852
|
|
|
'ISO-8859-10', |
5853
|
|
|
'ISO-8859-13', |
5854
|
|
|
'ISO-8859-14', |
5855
|
|
|
'ISO-8859-15', |
5856
|
|
|
'ISO-8859-16', |
5857
|
|
|
'WINDOWS-1251', |
5858
|
|
|
'WINDOWS-1252', |
5859
|
|
|
'WINDOWS-1254', |
5860
|
|
|
'CP932', |
5861
|
|
|
'CP936', |
5862
|
|
|
'CP950', |
5863
|
|
|
'CP866', |
5864
|
|
|
'CP850', |
5865
|
|
|
'CP51932', |
5866
|
|
|
'CP50220', |
5867
|
|
|
'CP50221', |
5868
|
|
|
'CP50222', |
5869
|
|
|
'ISO-2022-JP', |
5870
|
|
|
'ISO-2022-KR', |
5871
|
|
|
'JIS', |
5872
|
|
|
'JIS-ms', |
5873
|
|
|
'EUC-CN', |
5874
|
|
|
'EUC-JP', |
5875
|
|
|
]; |
5876
|
|
|
|
5877
|
16 |
|
if (self::$SUPPORT['mbstring'] === true) { |
5878
|
|
|
// info: do not use the symfony polyfill here |
5879
|
16 |
|
$encoding = \mb_detect_encoding($str, $encoding_detecting_order, true); |
5880
|
16 |
|
if ($encoding) { |
5881
|
16 |
|
return $encoding; |
5882
|
|
|
} |
5883
|
|
|
} |
5884
|
|
|
|
5885
|
|
|
// |
5886
|
|
|
// 5.) check via "iconv()" |
5887
|
|
|
// |
5888
|
|
|
|
5889
|
|
|
if (self::$ENCODINGS === null) { |
5890
|
|
|
self::$ENCODINGS = self::getData('encodings'); |
5891
|
|
|
} |
5892
|
|
|
|
5893
|
|
|
foreach (self::$ENCODINGS as $encoding_tmp) { |
5894
|
|
|
// INFO: //IGNORE but still throw notice |
5895
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
5896
|
|
|
if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) { |
5897
|
|
|
return $encoding_tmp; |
5898
|
|
|
} |
5899
|
|
|
} |
5900
|
|
|
|
5901
|
|
|
return false; |
5902
|
|
|
} |
5903
|
|
|
|
5904
|
|
|
/** |
5905
|
|
|
* Check if the string ends with the given substring. |
5906
|
|
|
* |
5907
|
|
|
* EXAMPLE: <code> |
5908
|
|
|
* UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true |
5909
|
|
|
* UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false |
5910
|
|
|
* </code> |
5911
|
|
|
* |
5912
|
|
|
* @param string $haystack <p>The string to search in.</p> |
5913
|
|
|
* @param string $needle <p>The substring to search for.</p> |
5914
|
|
|
* |
5915
|
|
|
* @psalm-pure |
5916
|
|
|
* |
5917
|
|
|
* @return bool |
5918
|
|
|
*/ |
5919
|
9 |
|
public static function str_ends_with(string $haystack, string $needle): bool |
5920
|
|
|
{ |
5921
|
9 |
|
if ($needle === '') { |
5922
|
2 |
|
return true; |
5923
|
|
|
} |
5924
|
|
|
|
5925
|
9 |
|
if ($haystack === '') { |
5926
|
1 |
|
return false; |
5927
|
|
|
} |
5928
|
|
|
|
5929
|
9 |
|
if (\PHP_VERSION_ID >= 80000) { |
5930
|
|
|
/** @phpstan-ignore-next-line - only for PHP8 */ |
5931
|
9 |
|
return \str_ends_with($haystack, $needle); |
5932
|
|
|
} |
5933
|
|
|
|
5934
|
|
|
return \substr($haystack, -\strlen($needle)) === $needle; |
5935
|
|
|
} |
5936
|
|
|
|
5937
|
|
|
/** |
5938
|
|
|
* Returns true if the string ends with any of $substrings, false otherwise. |
5939
|
|
|
* |
5940
|
|
|
* - case-sensitive |
5941
|
|
|
* |
5942
|
|
|
* @param string $str <p>The input string.</p> |
5943
|
|
|
* @param string[] $substrings <p>Substrings to look for.</p> |
5944
|
|
|
* |
5945
|
|
|
* @psalm-pure |
5946
|
|
|
* |
5947
|
|
|
* @return bool |
5948
|
|
|
* <p>Whether or not $str ends with $substring.</p> |
5949
|
|
|
*/ |
5950
|
7 |
|
public static function str_ends_with_any(string $str, array $substrings): bool |
5951
|
|
|
{ |
5952
|
7 |
|
if ($substrings === []) { |
5953
|
|
|
return false; |
5954
|
|
|
} |
5955
|
|
|
|
5956
|
7 |
|
foreach ($substrings as &$substring) { |
5957
|
7 |
|
if (\substr($str, -\strlen($substring)) === $substring) { |
5958
|
1 |
|
return true; |
5959
|
|
|
} |
5960
|
|
|
} |
5961
|
|
|
|
5962
|
6 |
|
return false; |
5963
|
|
|
} |
5964
|
|
|
|
5965
|
|
|
/** |
5966
|
|
|
* Ensures that the string begins with $substring. If it doesn't, it's |
5967
|
|
|
* prepended. |
5968
|
|
|
* |
5969
|
|
|
* @param string $str <p>The input string.</p> |
5970
|
|
|
* @param string $substring <p>The substring to add if not present.</p> |
5971
|
|
|
* |
5972
|
|
|
* @psalm-pure |
5973
|
|
|
* |
5974
|
|
|
* @return string |
5975
|
|
|
*/ |
5976
|
10 |
|
public static function str_ensure_left(string $str, string $substring): string |
5977
|
|
|
{ |
5978
|
|
|
if ( |
5979
|
10 |
|
$substring !== '' |
5980
|
|
|
&& |
5981
|
10 |
|
\strpos($str, $substring) === 0 |
5982
|
|
|
) { |
5983
|
6 |
|
return $str; |
5984
|
|
|
} |
5985
|
|
|
|
5986
|
4 |
|
return $substring . $str; |
5987
|
|
|
} |
5988
|
|
|
|
5989
|
|
|
/** |
5990
|
|
|
* Ensures that the string ends with $substring. If it doesn't, it's appended. |
5991
|
|
|
* |
5992
|
|
|
* @param string $str <p>The input string.</p> |
5993
|
|
|
* @param string $substring <p>The substring to add if not present.</p> |
5994
|
|
|
* |
5995
|
|
|
* @psalm-pure |
5996
|
|
|
* |
5997
|
|
|
* @return string |
5998
|
|
|
*/ |
5999
|
10 |
|
public static function str_ensure_right(string $str, string $substring): string |
6000
|
|
|
{ |
6001
|
|
|
if ( |
6002
|
10 |
|
$str === '' |
6003
|
|
|
|| |
6004
|
10 |
|
$substring === '' |
6005
|
|
|
|| |
6006
|
10 |
|
\substr($str, -\strlen($substring)) !== $substring |
6007
|
|
|
) { |
6008
|
4 |
|
$str .= $substring; |
6009
|
|
|
} |
6010
|
|
|
|
6011
|
10 |
|
return $str; |
6012
|
|
|
} |
6013
|
|
|
|
6014
|
|
|
/** |
6015
|
|
|
* Capitalizes the first word of the string, replaces underscores with |
6016
|
|
|
* spaces, and strips '_id'. |
6017
|
|
|
* |
6018
|
|
|
* @param string $str |
6019
|
|
|
* |
6020
|
|
|
* @psalm-pure |
6021
|
|
|
* |
6022
|
|
|
* @return string |
6023
|
|
|
*/ |
6024
|
3 |
|
public static function str_humanize($str): string |
6025
|
|
|
{ |
6026
|
3 |
|
$str = \str_replace( |
6027
|
|
|
[ |
6028
|
3 |
|
'_id', |
6029
|
|
|
'_', |
6030
|
|
|
], |
6031
|
|
|
[ |
6032
|
3 |
|
'', |
6033
|
|
|
' ', |
6034
|
|
|
], |
6035
|
3 |
|
$str |
6036
|
|
|
); |
6037
|
|
|
|
6038
|
3 |
|
return self::ucfirst(\trim($str)); |
6039
|
|
|
} |
6040
|
|
|
|
6041
|
|
|
/** |
6042
|
|
|
* Check if the string ends with the given substring, case-insensitive. |
6043
|
|
|
* |
6044
|
|
|
* EXAMPLE: <code> |
6045
|
|
|
* UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true |
6046
|
|
|
* UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true |
6047
|
|
|
* </code> |
6048
|
|
|
* |
6049
|
|
|
* @param string $haystack <p>The string to search in.</p> |
6050
|
|
|
* @param string $needle <p>The substring to search for.</p> |
6051
|
|
|
* |
6052
|
|
|
* @psalm-pure |
6053
|
|
|
* |
6054
|
|
|
* @return bool |
6055
|
|
|
*/ |
6056
|
12 |
|
public static function str_iends_with(string $haystack, string $needle): bool |
6057
|
|
|
{ |
6058
|
12 |
|
if ($needle === '') { |
6059
|
2 |
|
return true; |
6060
|
|
|
} |
6061
|
|
|
|
6062
|
12 |
|
if ($haystack === '') { |
6063
|
|
|
return false; |
6064
|
|
|
} |
6065
|
|
|
|
6066
|
12 |
|
return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0; |
6067
|
|
|
} |
6068
|
|
|
|
6069
|
|
|
/** |
6070
|
|
|
* Returns true if the string ends with any of $substrings, false otherwise. |
6071
|
|
|
* |
6072
|
|
|
* - case-insensitive |
6073
|
|
|
* |
6074
|
|
|
* @param string $str <p>The input string.</p> |
6075
|
|
|
* @param string[] $substrings <p>Substrings to look for.</p> |
6076
|
|
|
* |
6077
|
|
|
* @psalm-pure |
6078
|
|
|
* |
6079
|
|
|
* @return bool |
6080
|
|
|
* <p>Whether or not $str ends with $substring.</p> |
6081
|
|
|
*/ |
6082
|
4 |
|
public static function str_iends_with_any(string $str, array $substrings): bool |
6083
|
|
|
{ |
6084
|
4 |
|
if ($substrings === []) { |
6085
|
|
|
return false; |
6086
|
|
|
} |
6087
|
|
|
|
6088
|
4 |
|
foreach ($substrings as &$substring) { |
6089
|
4 |
|
if (self::str_iends_with($str, $substring)) { |
6090
|
4 |
|
return true; |
6091
|
|
|
} |
6092
|
|
|
} |
6093
|
|
|
|
6094
|
|
|
return false; |
6095
|
|
|
} |
6096
|
|
|
|
6097
|
|
|
/** |
6098
|
|
|
* Inserts $substring into the string at the $index provided. |
6099
|
|
|
* |
6100
|
|
|
* @param string $str <p>The input string.</p> |
6101
|
|
|
* @param string $substring <p>String to be inserted.</p> |
6102
|
|
|
* @param int $index <p>The index at which to insert the substring.</p> |
6103
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6104
|
|
|
* |
6105
|
|
|
* @psalm-pure |
6106
|
|
|
* |
6107
|
|
|
* @return string |
6108
|
|
|
*/ |
6109
|
8 |
|
public static function str_insert( |
6110
|
|
|
string $str, |
6111
|
|
|
string $substring, |
6112
|
|
|
int $index, |
6113
|
|
|
string $encoding = 'UTF-8' |
6114
|
|
|
): string { |
6115
|
8 |
|
if ($encoding === 'UTF-8') { |
6116
|
4 |
|
$len = (int) \mb_strlen($str); |
6117
|
4 |
|
if ($index > $len) { |
6118
|
|
|
return $str; |
6119
|
|
|
} |
6120
|
|
|
|
6121
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
6122
|
4 |
|
return (string) \mb_substr($str, 0, $index) . |
6123
|
4 |
|
$substring . |
6124
|
4 |
|
(string) \mb_substr($str, $index, $len); |
6125
|
|
|
} |
6126
|
|
|
|
6127
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
6128
|
|
|
|
6129
|
4 |
|
$len = (int) self::strlen($str, $encoding); |
6130
|
4 |
|
if ($index > $len) { |
6131
|
1 |
|
return $str; |
6132
|
|
|
} |
6133
|
|
|
|
6134
|
3 |
|
return ((string) self::substr($str, 0, $index, $encoding)) . |
6135
|
3 |
|
$substring . |
6136
|
3 |
|
((string) self::substr($str, $index, $len, $encoding)); |
6137
|
|
|
} |
6138
|
|
|
|
6139
|
|
|
/** |
6140
|
|
|
* Case-insensitive and UTF-8 safe version of <function>str_replace</function>. |
6141
|
|
|
* |
6142
|
|
|
* EXAMPLE: <code> |
6143
|
|
|
* UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn' |
6144
|
|
|
* </code> |
6145
|
|
|
* |
6146
|
|
|
* @see http://php.net/manual/en/function.str-ireplace.php |
6147
|
|
|
* |
6148
|
|
|
* @param string|string[] $search <p> |
6149
|
|
|
* Every replacement with search array is |
6150
|
|
|
* performed on the result of previous replacement. |
6151
|
|
|
* </p> |
6152
|
|
|
* @param string|string[] $replacement <p>The replacement.</p> |
6153
|
|
|
* @param string|string[] $subject <p> |
6154
|
|
|
* If subject is an array, then the search and |
6155
|
|
|
* replace is performed with every entry of |
6156
|
|
|
* subject, and the return value is an array as |
6157
|
|
|
* well. |
6158
|
|
|
* </p> |
6159
|
|
|
* @param int $count [optional] <p> |
6160
|
|
|
* The number of matched and replaced needles will |
6161
|
|
|
* be returned in count which is passed by |
6162
|
|
|
* reference. |
6163
|
|
|
* </p> |
6164
|
|
|
* |
6165
|
|
|
* @psalm-pure |
6166
|
|
|
* |
6167
|
|
|
* @return string|string[] |
6168
|
|
|
* <p>A string or an array of replacements.</p> |
6169
|
|
|
* |
6170
|
|
|
* @template TStrIReplaceSubject |
6171
|
|
|
* @phpstan-param TStrIReplaceSubject $subject |
6172
|
|
|
* @phpstan-return TStrIReplaceSubject |
6173
|
|
|
*/ |
6174
|
29 |
|
public static function str_ireplace($search, $replacement, $subject, &$count = null) |
6175
|
|
|
{ |
6176
|
29 |
|
$search = (array) $search; |
6177
|
|
|
|
6178
|
|
|
/** @noinspection AlterInForeachInspection */ |
6179
|
29 |
|
foreach ($search as &$s) { |
6180
|
29 |
|
$s = (string) $s; |
6181
|
29 |
|
if ($s === '') { |
6182
|
6 |
|
$s = '/^(?<=.)$/'; |
6183
|
|
|
} else { |
6184
|
24 |
|
$s = '/' . \preg_quote($s, '/') . '/ui'; |
6185
|
|
|
} |
6186
|
|
|
} |
6187
|
|
|
|
6188
|
|
|
// fallback |
6189
|
|
|
/** @phpstan-ignore-next-line - only a fallback for PHP8 */ |
6190
|
29 |
|
if ($replacement === null) { |
|
|
|
|
6191
|
1 |
|
$replacement = ''; |
6192
|
|
|
} |
6193
|
|
|
/** @phpstan-ignore-next-line - only a fallback for PHP8 */ |
6194
|
29 |
|
if ($subject === null) { |
6195
|
1 |
|
$subject = ''; |
6196
|
|
|
} |
6197
|
|
|
|
6198
|
|
|
/** |
6199
|
|
|
* @psalm-suppress PossiblyNullArgument |
6200
|
|
|
* @phpstan-var TStrIReplaceSubject $subject |
6201
|
|
|
*/ |
6202
|
29 |
|
$subject = \preg_replace($search, $replacement, $subject, -1, $count); |
6203
|
|
|
|
6204
|
29 |
|
return $subject; |
6205
|
|
|
} |
6206
|
|
|
|
6207
|
|
|
/** |
6208
|
|
|
* Replaces $search from the beginning of string with $replacement. |
6209
|
|
|
* |
6210
|
|
|
* @param string $str <p>The input string.</p> |
6211
|
|
|
* @param string $search <p>The string to search for.</p> |
6212
|
|
|
* @param string $replacement <p>The replacement.</p> |
6213
|
|
|
* |
6214
|
|
|
* @psalm-pure |
6215
|
|
|
* |
6216
|
|
|
* @return string |
6217
|
|
|
* <p>The string after the replacement.</p> |
6218
|
|
|
*/ |
6219
|
17 |
|
public static function str_ireplace_beginning(string $str, string $search, string $replacement): string |
6220
|
|
|
{ |
6221
|
17 |
|
if ($str === '') { |
6222
|
4 |
|
if ($replacement === '') { |
6223
|
2 |
|
return ''; |
6224
|
|
|
} |
6225
|
|
|
|
6226
|
2 |
|
if ($search === '') { |
6227
|
2 |
|
return $replacement; |
6228
|
|
|
} |
6229
|
|
|
} |
6230
|
|
|
|
6231
|
13 |
|
if ($search === '') { |
6232
|
2 |
|
return $str . $replacement; |
6233
|
|
|
} |
6234
|
|
|
|
6235
|
11 |
|
$searchLength = \strlen($search); |
6236
|
11 |
|
if (\strncasecmp($str, $search, $searchLength) === 0) { |
6237
|
10 |
|
return $replacement . \substr($str, $searchLength); |
6238
|
|
|
} |
6239
|
|
|
|
6240
|
1 |
|
return $str; |
6241
|
|
|
} |
6242
|
|
|
|
6243
|
|
|
/** |
6244
|
|
|
* Replaces $search from the ending of string with $replacement. |
6245
|
|
|
* |
6246
|
|
|
* @param string $str <p>The input string.</p> |
6247
|
|
|
* @param string $search <p>The string to search for.</p> |
6248
|
|
|
* @param string $replacement <p>The replacement.</p> |
6249
|
|
|
* |
6250
|
|
|
* @psalm-pure |
6251
|
|
|
* |
6252
|
|
|
* @return string |
6253
|
|
|
* <p>The string after the replacement.</p> |
6254
|
|
|
*/ |
6255
|
17 |
|
public static function str_ireplace_ending(string $str, string $search, string $replacement): string |
6256
|
|
|
{ |
6257
|
17 |
|
if ($str === '') { |
6258
|
4 |
|
if ($replacement === '') { |
6259
|
2 |
|
return ''; |
6260
|
|
|
} |
6261
|
|
|
|
6262
|
2 |
|
if ($search === '') { |
6263
|
2 |
|
return $replacement; |
6264
|
|
|
} |
6265
|
|
|
} |
6266
|
|
|
|
6267
|
13 |
|
if ($search === '') { |
6268
|
2 |
|
return $str . $replacement; |
6269
|
|
|
} |
6270
|
|
|
|
6271
|
11 |
|
if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) { |
6272
|
9 |
|
$str = \substr($str, 0, -\strlen($search)) . $replacement; |
6273
|
|
|
} |
6274
|
|
|
|
6275
|
11 |
|
return $str; |
6276
|
|
|
} |
6277
|
|
|
|
6278
|
|
|
/** |
6279
|
|
|
* Check if the string starts with the given substring, case-insensitive. |
6280
|
|
|
* |
6281
|
|
|
* EXAMPLE: <code> |
6282
|
|
|
* UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true |
6283
|
|
|
* UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true |
6284
|
|
|
* </code> |
6285
|
|
|
* |
6286
|
|
|
* @param string $haystack <p>The string to search in.</p> |
6287
|
|
|
* @param string $needle <p>The substring to search for.</p> |
6288
|
|
|
* |
6289
|
|
|
* @psalm-pure |
6290
|
|
|
* |
6291
|
|
|
* @return bool |
6292
|
|
|
*/ |
6293
|
13 |
|
public static function str_istarts_with(string $haystack, string $needle): bool |
6294
|
|
|
{ |
6295
|
13 |
|
if ($needle === '') { |
6296
|
2 |
|
return true; |
6297
|
|
|
} |
6298
|
|
|
|
6299
|
13 |
|
if ($haystack === '') { |
6300
|
|
|
return false; |
6301
|
|
|
} |
6302
|
|
|
|
6303
|
13 |
|
return self::stripos($haystack, $needle) === 0; |
6304
|
|
|
} |
6305
|
|
|
|
6306
|
|
|
/** |
6307
|
|
|
* Returns true if the string begins with any of $substrings, false otherwise. |
6308
|
|
|
* |
6309
|
|
|
* - case-insensitive |
6310
|
|
|
* |
6311
|
|
|
* @param string $str <p>The input string.</p> |
6312
|
|
|
* @param array $substrings <p>Substrings to look for.</p> |
6313
|
|
|
* |
6314
|
|
|
* @psalm-pure |
6315
|
|
|
* |
6316
|
|
|
* @return bool |
6317
|
|
|
* <p>Whether or not $str starts with $substring.</p> |
6318
|
|
|
*/ |
6319
|
5 |
|
public static function str_istarts_with_any(string $str, array $substrings): bool |
6320
|
|
|
{ |
6321
|
5 |
|
if ($str === '') { |
6322
|
|
|
return false; |
6323
|
|
|
} |
6324
|
|
|
|
6325
|
5 |
|
if ($substrings === []) { |
6326
|
|
|
return false; |
6327
|
|
|
} |
6328
|
|
|
|
6329
|
5 |
|
foreach ($substrings as &$substring) { |
6330
|
5 |
|
if (self::str_istarts_with($str, $substring)) { |
6331
|
5 |
|
return true; |
6332
|
|
|
} |
6333
|
|
|
} |
6334
|
|
|
|
6335
|
1 |
|
return false; |
6336
|
|
|
} |
6337
|
|
|
|
6338
|
|
|
/** |
6339
|
|
|
* Gets the substring after the first occurrence of a separator. |
6340
|
|
|
* |
6341
|
|
|
* @param string $str <p>The input string.</p> |
6342
|
|
|
* @param string $separator <p>The string separator.</p> |
6343
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
6344
|
|
|
* |
6345
|
|
|
* @psalm-pure |
6346
|
|
|
* |
6347
|
|
|
* @return string |
6348
|
|
|
*/ |
6349
|
1 |
|
public static function str_isubstr_after_first_separator( |
6350
|
|
|
string $str, |
6351
|
|
|
string $separator, |
6352
|
|
|
string $encoding = 'UTF-8' |
6353
|
|
|
): string { |
6354
|
1 |
|
if ($separator === '' || $str === '') { |
6355
|
1 |
|
return ''; |
6356
|
|
|
} |
6357
|
|
|
|
6358
|
1 |
|
$offset = self::stripos($str, $separator); |
6359
|
1 |
|
if ($offset === false) { |
6360
|
1 |
|
return ''; |
6361
|
|
|
} |
6362
|
|
|
|
6363
|
1 |
|
if ($encoding === 'UTF-8') { |
6364
|
1 |
|
return (string) \mb_substr( |
6365
|
1 |
|
$str, |
6366
|
1 |
|
$offset + (int) \mb_strlen($separator) |
6367
|
|
|
); |
6368
|
|
|
} |
6369
|
|
|
|
6370
|
|
|
return (string) self::substr( |
6371
|
|
|
$str, |
6372
|
|
|
$offset + (int) self::strlen($separator, $encoding), |
6373
|
|
|
null, |
6374
|
|
|
$encoding |
6375
|
|
|
); |
6376
|
|
|
} |
6377
|
|
|
|
6378
|
|
|
/** |
6379
|
|
|
* Gets the substring after the last occurrence of a separator. |
6380
|
|
|
* |
6381
|
|
|
* @param string $str <p>The input string.</p> |
6382
|
|
|
* @param string $separator <p>The string separator.</p> |
6383
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
6384
|
|
|
* |
6385
|
|
|
* @psalm-pure |
6386
|
|
|
* |
6387
|
|
|
* @return string |
6388
|
|
|
*/ |
6389
|
1 |
|
public static function str_isubstr_after_last_separator( |
6390
|
|
|
string $str, |
6391
|
|
|
string $separator, |
6392
|
|
|
string $encoding = 'UTF-8' |
6393
|
|
|
): string { |
6394
|
1 |
|
if ($separator === '' || $str === '') { |
6395
|
1 |
|
return ''; |
6396
|
|
|
} |
6397
|
|
|
|
6398
|
1 |
|
$offset = self::strripos($str, $separator); |
6399
|
1 |
|
if ($offset === false) { |
6400
|
1 |
|
return ''; |
6401
|
|
|
} |
6402
|
|
|
|
6403
|
1 |
|
if ($encoding === 'UTF-8') { |
6404
|
1 |
|
return (string) \mb_substr( |
6405
|
1 |
|
$str, |
6406
|
1 |
|
$offset + (int) self::strlen($separator) |
6407
|
|
|
); |
6408
|
|
|
} |
6409
|
|
|
|
6410
|
|
|
return (string) self::substr( |
6411
|
|
|
$str, |
6412
|
|
|
$offset + (int) self::strlen($separator, $encoding), |
6413
|
|
|
null, |
6414
|
|
|
$encoding |
6415
|
|
|
); |
6416
|
|
|
} |
6417
|
|
|
|
6418
|
|
|
/** |
6419
|
|
|
* Gets the substring before the first occurrence of a separator. |
6420
|
|
|
* |
6421
|
|
|
* @param string $str <p>The input string.</p> |
6422
|
|
|
* @param string $separator <p>The string separator.</p> |
6423
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
6424
|
|
|
* |
6425
|
|
|
* @psalm-pure |
6426
|
|
|
* |
6427
|
|
|
* @return string |
6428
|
|
|
*/ |
6429
|
1 |
|
public static function str_isubstr_before_first_separator( |
6430
|
|
|
string $str, |
6431
|
|
|
string $separator, |
6432
|
|
|
string $encoding = 'UTF-8' |
6433
|
|
|
): string { |
6434
|
1 |
|
if ($separator === '' || $str === '') { |
6435
|
1 |
|
return ''; |
6436
|
|
|
} |
6437
|
|
|
|
6438
|
1 |
|
$offset = self::stripos($str, $separator); |
6439
|
1 |
|
if ($offset === false) { |
6440
|
1 |
|
return ''; |
6441
|
|
|
} |
6442
|
|
|
|
6443
|
1 |
|
if ($encoding === 'UTF-8') { |
6444
|
1 |
|
return (string) \mb_substr($str, 0, $offset); |
6445
|
|
|
} |
6446
|
|
|
|
6447
|
|
|
return (string) self::substr($str, 0, $offset, $encoding); |
6448
|
|
|
} |
6449
|
|
|
|
6450
|
|
|
/** |
6451
|
|
|
* Gets the substring before the last occurrence of a separator. |
6452
|
|
|
* |
6453
|
|
|
* @param string $str <p>The input string.</p> |
6454
|
|
|
* @param string $separator <p>The string separator.</p> |
6455
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
6456
|
|
|
* |
6457
|
|
|
* @psalm-pure |
6458
|
|
|
* |
6459
|
|
|
* @return string |
6460
|
|
|
*/ |
6461
|
1 |
|
public static function str_isubstr_before_last_separator( |
6462
|
|
|
string $str, |
6463
|
|
|
string $separator, |
6464
|
|
|
string $encoding = 'UTF-8' |
6465
|
|
|
): string { |
6466
|
1 |
|
if ($separator === '' || $str === '') { |
6467
|
1 |
|
return ''; |
6468
|
|
|
} |
6469
|
|
|
|
6470
|
1 |
|
if ($encoding === 'UTF-8') { |
6471
|
1 |
|
$offset = \mb_strripos($str, $separator); |
6472
|
1 |
|
if ($offset === false) { |
6473
|
1 |
|
return ''; |
6474
|
|
|
} |
6475
|
|
|
|
6476
|
1 |
|
return (string) \mb_substr($str, 0, $offset); |
6477
|
|
|
} |
6478
|
|
|
|
6479
|
|
|
$offset = self::strripos($str, $separator, 0, $encoding); |
6480
|
|
|
if ($offset === false) { |
6481
|
|
|
return ''; |
6482
|
|
|
} |
6483
|
|
|
|
6484
|
|
|
return (string) self::substr($str, 0, $offset, $encoding); |
6485
|
|
|
} |
6486
|
|
|
|
6487
|
|
|
/** |
6488
|
|
|
* Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle". |
6489
|
|
|
* |
6490
|
|
|
* @param string $str <p>The input string.</p> |
6491
|
|
|
* @param string $needle <p>The string to look for.</p> |
6492
|
|
|
* @param bool $before_needle [optional] <p>Default: false</p> |
6493
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
6494
|
|
|
* |
6495
|
|
|
* @psalm-pure |
6496
|
|
|
* |
6497
|
|
|
* @return string |
6498
|
|
|
*/ |
6499
|
2 |
|
public static function str_isubstr_first( |
6500
|
|
|
string $str, |
6501
|
|
|
string $needle, |
6502
|
|
|
bool $before_needle = false, |
6503
|
|
|
string $encoding = 'UTF-8' |
6504
|
|
|
): string { |
6505
|
|
|
if ( |
6506
|
2 |
|
$needle === '' |
6507
|
|
|
|| |
6508
|
2 |
|
$str === '' |
6509
|
|
|
) { |
6510
|
2 |
|
return ''; |
6511
|
|
|
} |
6512
|
|
|
|
6513
|
2 |
|
$part = self::stristr( |
6514
|
2 |
|
$str, |
6515
|
|
|
$needle, |
6516
|
|
|
$before_needle, |
6517
|
|
|
$encoding |
6518
|
|
|
); |
6519
|
2 |
|
if ($part === false) { |
6520
|
2 |
|
return ''; |
6521
|
|
|
} |
6522
|
|
|
|
6523
|
2 |
|
return $part; |
6524
|
|
|
} |
6525
|
|
|
|
6526
|
|
|
/** |
6527
|
|
|
* Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle". |
6528
|
|
|
* |
6529
|
|
|
* @param string $str <p>The input string.</p> |
6530
|
|
|
* @param string $needle <p>The string to look for.</p> |
6531
|
|
|
* @param bool $before_needle [optional] <p>Default: false</p> |
6532
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
6533
|
|
|
* |
6534
|
|
|
* @psalm-pure |
6535
|
|
|
* |
6536
|
|
|
* @return string |
6537
|
|
|
*/ |
6538
|
1 |
|
public static function str_isubstr_last( |
6539
|
|
|
string $str, |
6540
|
|
|
string $needle, |
6541
|
|
|
bool $before_needle = false, |
6542
|
|
|
string $encoding = 'UTF-8' |
6543
|
|
|
): string { |
6544
|
|
|
if ( |
6545
|
1 |
|
$needle === '' |
6546
|
|
|
|| |
6547
|
1 |
|
$str === '' |
6548
|
|
|
) { |
6549
|
1 |
|
return ''; |
6550
|
|
|
} |
6551
|
|
|
|
6552
|
1 |
|
$part = self::strrichr( |
6553
|
1 |
|
$str, |
6554
|
|
|
$needle, |
6555
|
|
|
$before_needle, |
6556
|
|
|
$encoding |
6557
|
|
|
); |
6558
|
1 |
|
if ($part === false) { |
6559
|
1 |
|
return ''; |
6560
|
|
|
} |
6561
|
|
|
|
6562
|
1 |
|
return $part; |
6563
|
|
|
} |
6564
|
|
|
|
6565
|
|
|
/** |
6566
|
|
|
* Returns the last $n characters of the string. |
6567
|
|
|
* |
6568
|
|
|
* @param string $str <p>The input string.</p> |
6569
|
|
|
* @param int $n <p>Number of characters to retrieve from the end.</p> |
6570
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6571
|
|
|
* |
6572
|
|
|
* @psalm-pure |
6573
|
|
|
* |
6574
|
|
|
* @return string |
6575
|
|
|
*/ |
6576
|
12 |
|
public static function str_last_char( |
6577
|
|
|
string $str, |
6578
|
|
|
int $n = 1, |
6579
|
|
|
string $encoding = 'UTF-8' |
6580
|
|
|
): string { |
6581
|
12 |
|
if ($str === '' || $n <= 0) { |
6582
|
4 |
|
return ''; |
6583
|
|
|
} |
6584
|
|
|
|
6585
|
8 |
|
if ($encoding === 'UTF-8') { |
6586
|
4 |
|
return (string) \mb_substr($str, -$n); |
6587
|
|
|
} |
6588
|
|
|
|
6589
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
6590
|
|
|
|
6591
|
4 |
|
return (string) self::substr($str, -$n, null, $encoding); |
6592
|
|
|
} |
6593
|
|
|
|
6594
|
|
|
/** |
6595
|
|
|
* Limit the number of characters in a string. |
6596
|
|
|
* |
6597
|
|
|
* @param string $str <p>The input string.</p> |
6598
|
|
|
* @param int $length [optional] <p>Default: 100</p> |
6599
|
|
|
* @param string $str_add_on [optional] <p>Default: …</p> |
6600
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6601
|
|
|
* |
6602
|
|
|
* @psalm-pure |
6603
|
|
|
* |
6604
|
|
|
* @return string |
6605
|
|
|
*/ |
6606
|
2 |
|
public static function str_limit( |
6607
|
|
|
string $str, |
6608
|
|
|
int $length = 100, |
6609
|
|
|
string $str_add_on = '…', |
6610
|
|
|
string $encoding = 'UTF-8' |
6611
|
|
|
): string { |
6612
|
2 |
|
if ($str === '' || $length <= 0) { |
6613
|
2 |
|
return ''; |
6614
|
|
|
} |
6615
|
|
|
|
6616
|
2 |
|
if ($encoding === 'UTF-8') { |
6617
|
2 |
|
if ((int) \mb_strlen($str) <= $length) { |
6618
|
2 |
|
return $str; |
6619
|
|
|
} |
6620
|
|
|
|
6621
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
6622
|
2 |
|
return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on; |
6623
|
|
|
} |
6624
|
|
|
|
6625
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
6626
|
|
|
|
6627
|
|
|
if ((int) self::strlen($str, $encoding) <= $length) { |
6628
|
|
|
return $str; |
6629
|
|
|
} |
6630
|
|
|
|
6631
|
|
|
return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on; |
6632
|
|
|
} |
6633
|
|
|
|
6634
|
|
|
/** |
6635
|
|
|
* Limit the number of characters in a string, but also after the next word. |
6636
|
|
|
* |
6637
|
|
|
* EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code> |
6638
|
|
|
* |
6639
|
|
|
* @param string $str <p>The input string.</p> |
6640
|
|
|
* @param int $length [optional] <p>Default: 100</p> |
6641
|
|
|
* @param string $str_add_on [optional] <p>Default: …</p> |
6642
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6643
|
|
|
* |
6644
|
|
|
* @psalm-pure |
6645
|
|
|
* |
6646
|
|
|
* @return string |
6647
|
|
|
*/ |
6648
|
6 |
|
public static function str_limit_after_word( |
6649
|
|
|
string $str, |
6650
|
|
|
int $length = 100, |
6651
|
|
|
string $str_add_on = '…', |
6652
|
|
|
string $encoding = 'UTF-8' |
6653
|
|
|
): string { |
6654
|
6 |
|
if ($str === '' || $length <= 0) { |
6655
|
2 |
|
return ''; |
6656
|
|
|
} |
6657
|
|
|
|
6658
|
6 |
|
if ($encoding === 'UTF-8') { |
6659
|
2 |
|
if ((int) \mb_strlen($str) <= $length) { |
6660
|
2 |
|
return $str; |
6661
|
|
|
} |
6662
|
|
|
|
6663
|
2 |
|
if (\mb_substr($str, $length - 1, 1) === ' ') { |
6664
|
2 |
|
return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on; |
6665
|
|
|
} |
6666
|
|
|
|
6667
|
2 |
|
$str = \mb_substr($str, 0, $length); |
6668
|
|
|
|
6669
|
2 |
|
$array = \explode(' ', $str, -1); |
6670
|
2 |
|
$new_str = \implode(' ', $array); |
6671
|
|
|
|
6672
|
2 |
|
if ($new_str === '') { |
6673
|
2 |
|
return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on; |
6674
|
|
|
} |
6675
|
|
|
} else { |
6676
|
4 |
|
if ((int) self::strlen($str, $encoding) <= $length) { |
6677
|
|
|
return $str; |
6678
|
|
|
} |
6679
|
|
|
|
6680
|
4 |
|
if (self::substr($str, $length - 1, 1, $encoding) === ' ') { |
6681
|
3 |
|
return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on; |
6682
|
|
|
} |
6683
|
|
|
|
6684
|
|
|
/** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ |
6685
|
1 |
|
$str = self::substr($str, 0, $length, $encoding); |
6686
|
1 |
|
if ($str === false) { |
6687
|
|
|
return '' . $str_add_on; |
6688
|
|
|
} |
6689
|
|
|
|
6690
|
1 |
|
$array = \explode(' ', $str, -1); |
6691
|
1 |
|
$new_str = \implode(' ', $array); |
6692
|
|
|
|
6693
|
1 |
|
if ($new_str === '') { |
6694
|
|
|
return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on; |
6695
|
|
|
} |
6696
|
|
|
} |
6697
|
|
|
|
6698
|
3 |
|
return $new_str . $str_add_on; |
6699
|
|
|
} |
6700
|
|
|
|
6701
|
|
|
/** |
6702
|
|
|
* Returns the longest common prefix between the $str1 and $str2. |
6703
|
|
|
* |
6704
|
|
|
* @param string $str1 <p>The input sting.</p> |
6705
|
|
|
* @param string $str2 <p>Second string for comparison.</p> |
6706
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6707
|
|
|
* |
6708
|
|
|
* @psalm-pure |
6709
|
|
|
* |
6710
|
|
|
* @return string |
6711
|
|
|
*/ |
6712
|
10 |
|
public static function str_longest_common_prefix( |
6713
|
|
|
string $str1, |
6714
|
|
|
string $str2, |
6715
|
|
|
string $encoding = 'UTF-8' |
6716
|
|
|
): string { |
6717
|
|
|
// init |
6718
|
10 |
|
$longest_common_prefix = ''; |
6719
|
|
|
|
6720
|
10 |
|
if ($encoding === 'UTF-8') { |
6721
|
5 |
|
$max_length = (int) \min( |
6722
|
5 |
|
\mb_strlen($str1), |
6723
|
5 |
|
\mb_strlen($str2) |
6724
|
|
|
); |
6725
|
|
|
|
6726
|
5 |
|
for ($i = 0; $i < $max_length; ++$i) { |
6727
|
4 |
|
$char = \mb_substr($str1, $i, 1); |
6728
|
|
|
|
6729
|
|
|
if ( |
6730
|
4 |
|
$char !== false |
6731
|
|
|
&& |
6732
|
4 |
|
$char === \mb_substr($str2, $i, 1) |
6733
|
|
|
) { |
6734
|
3 |
|
$longest_common_prefix .= $char; |
6735
|
|
|
} else { |
6736
|
3 |
|
break; |
6737
|
|
|
} |
6738
|
|
|
} |
6739
|
|
|
} else { |
6740
|
5 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
6741
|
|
|
|
6742
|
5 |
|
$max_length = (int) \min( |
6743
|
5 |
|
self::strlen($str1, $encoding), |
6744
|
5 |
|
self::strlen($str2, $encoding) |
6745
|
|
|
); |
6746
|
|
|
|
6747
|
5 |
|
for ($i = 0; $i < $max_length; ++$i) { |
6748
|
4 |
|
$char = self::substr($str1, $i, 1, $encoding); |
6749
|
|
|
|
6750
|
|
|
if ( |
6751
|
4 |
|
$char !== false |
6752
|
|
|
&& |
6753
|
4 |
|
$char === self::substr($str2, $i, 1, $encoding) |
6754
|
|
|
) { |
6755
|
3 |
|
$longest_common_prefix .= $char; |
6756
|
|
|
} else { |
6757
|
3 |
|
break; |
6758
|
|
|
} |
6759
|
|
|
} |
6760
|
|
|
} |
6761
|
|
|
|
6762
|
10 |
|
return $longest_common_prefix; |
6763
|
|
|
} |
6764
|
|
|
|
6765
|
|
|
/** |
6766
|
|
|
* Returns the longest common substring between the $str1 and $str2. |
6767
|
|
|
* In the case of ties, it returns that which occurs first. |
6768
|
|
|
* |
6769
|
|
|
* @param string $str1 |
6770
|
|
|
* @param string $str2 <p>Second string for comparison.</p> |
6771
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6772
|
|
|
* |
6773
|
|
|
* @psalm-pure |
6774
|
|
|
* |
6775
|
|
|
* @return string |
6776
|
|
|
* <p>A string with its $str being the longest common substring.</p> |
6777
|
|
|
*/ |
6778
|
11 |
|
public static function str_longest_common_substring( |
6779
|
|
|
string $str1, |
6780
|
|
|
string $str2, |
6781
|
|
|
string $encoding = 'UTF-8' |
6782
|
|
|
): string { |
6783
|
11 |
|
if ($str1 === '' || $str2 === '') { |
6784
|
2 |
|
return ''; |
6785
|
|
|
} |
6786
|
|
|
|
6787
|
|
|
// Uses dynamic programming to solve |
6788
|
|
|
// http://en.wikipedia.org/wiki/Longest_common_substring_problem |
6789
|
|
|
|
6790
|
9 |
|
if ($encoding === 'UTF-8') { |
6791
|
4 |
|
$str_length = (int) \mb_strlen($str1); |
6792
|
4 |
|
$other_length = (int) \mb_strlen($str2); |
6793
|
|
|
} else { |
6794
|
5 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
6795
|
|
|
|
6796
|
5 |
|
$str_length = (int) self::strlen($str1, $encoding); |
6797
|
5 |
|
$other_length = (int) self::strlen($str2, $encoding); |
6798
|
|
|
} |
6799
|
|
|
|
6800
|
|
|
// Return if either string is empty |
6801
|
9 |
|
if ($str_length === 0 || $other_length === 0) { |
6802
|
|
|
return ''; |
6803
|
|
|
} |
6804
|
|
|
|
6805
|
9 |
|
$len = 0; |
6806
|
9 |
|
$end = 0; |
6807
|
9 |
|
$table = \array_fill( |
6808
|
9 |
|
0, |
6809
|
9 |
|
$str_length + 1, |
6810
|
9 |
|
\array_fill(0, $other_length + 1, 0) |
6811
|
|
|
); |
6812
|
|
|
|
6813
|
9 |
|
if ($encoding === 'UTF-8') { |
6814
|
9 |
|
for ($i = 1; $i <= $str_length; ++$i) { |
6815
|
9 |
|
for ($j = 1; $j <= $other_length; ++$j) { |
6816
|
9 |
|
$str_char = \mb_substr($str1, $i - 1, 1); |
6817
|
9 |
|
$other_char = \mb_substr($str2, $j - 1, 1); |
6818
|
|
|
|
6819
|
9 |
|
if ($str_char === $other_char) { |
6820
|
8 |
|
$table[$i][$j] = $table[$i - 1][$j - 1] + 1; |
6821
|
8 |
|
if ($table[$i][$j] > $len) { |
6822
|
8 |
|
$len = $table[$i][$j]; |
6823
|
8 |
|
$end = $i; |
6824
|
|
|
} |
6825
|
|
|
} else { |
6826
|
9 |
|
$table[$i][$j] = 0; |
6827
|
|
|
} |
6828
|
|
|
} |
6829
|
|
|
} |
6830
|
|
|
} else { |
6831
|
|
|
for ($i = 1; $i <= $str_length; ++$i) { |
6832
|
|
|
for ($j = 1; $j <= $other_length; ++$j) { |
6833
|
|
|
$str_char = self::substr($str1, $i - 1, 1, $encoding); |
6834
|
|
|
$other_char = self::substr($str2, $j - 1, 1, $encoding); |
6835
|
|
|
|
6836
|
|
|
if ($str_char === $other_char) { |
6837
|
|
|
$table[$i][$j] = $table[$i - 1][$j - 1] + 1; |
6838
|
|
|
if ($table[$i][$j] > $len) { |
6839
|
|
|
$len = $table[$i][$j]; |
6840
|
|
|
$end = $i; |
6841
|
|
|
} |
6842
|
|
|
} else { |
6843
|
|
|
$table[$i][$j] = 0; |
6844
|
|
|
} |
6845
|
|
|
} |
6846
|
|
|
} |
6847
|
|
|
} |
6848
|
|
|
|
6849
|
9 |
|
if ($encoding === 'UTF-8') { |
6850
|
9 |
|
return (string) \mb_substr($str1, $end - $len, $len); |
6851
|
|
|
} |
6852
|
|
|
|
6853
|
|
|
return (string) self::substr($str1, $end - $len, $len, $encoding); |
6854
|
|
|
} |
6855
|
|
|
|
6856
|
|
|
/** |
6857
|
|
|
* Returns the longest common suffix between the $str1 and $str2. |
6858
|
|
|
* |
6859
|
|
|
* @param string $str1 |
6860
|
|
|
* @param string $str2 <p>Second string for comparison.</p> |
6861
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6862
|
|
|
* |
6863
|
|
|
* @psalm-pure |
6864
|
|
|
* |
6865
|
|
|
* @return string |
6866
|
|
|
*/ |
6867
|
10 |
|
public static function str_longest_common_suffix( |
6868
|
|
|
string $str1, |
6869
|
|
|
string $str2, |
6870
|
|
|
string $encoding = 'UTF-8' |
6871
|
|
|
): string { |
6872
|
10 |
|
if ($str1 === '' || $str2 === '') { |
6873
|
2 |
|
return ''; |
6874
|
|
|
} |
6875
|
|
|
|
6876
|
8 |
|
if ($encoding === 'UTF-8') { |
6877
|
4 |
|
$max_length = (int) \min( |
6878
|
4 |
|
\mb_strlen($str1, $encoding), |
6879
|
4 |
|
\mb_strlen($str2, $encoding) |
6880
|
|
|
); |
6881
|
|
|
|
6882
|
4 |
|
$longest_common_suffix = ''; |
6883
|
4 |
|
for ($i = 1; $i <= $max_length; ++$i) { |
6884
|
4 |
|
$char = \mb_substr($str1, -$i, 1); |
6885
|
|
|
|
6886
|
|
|
if ( |
6887
|
4 |
|
$char !== false |
6888
|
|
|
&& |
6889
|
4 |
|
$char === \mb_substr($str2, -$i, 1) |
6890
|
|
|
) { |
6891
|
3 |
|
$longest_common_suffix = $char . $longest_common_suffix; |
6892
|
|
|
} else { |
6893
|
3 |
|
break; |
6894
|
|
|
} |
6895
|
|
|
} |
6896
|
|
|
} else { |
6897
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
6898
|
|
|
|
6899
|
4 |
|
$max_length = (int) \min( |
6900
|
4 |
|
self::strlen($str1, $encoding), |
6901
|
4 |
|
self::strlen($str2, $encoding) |
6902
|
|
|
); |
6903
|
|
|
|
6904
|
4 |
|
$longest_common_suffix = ''; |
6905
|
4 |
|
for ($i = 1; $i <= $max_length; ++$i) { |
6906
|
4 |
|
$char = self::substr($str1, -$i, 1, $encoding); |
6907
|
|
|
|
6908
|
|
|
if ( |
6909
|
4 |
|
$char !== false |
6910
|
|
|
&& |
6911
|
4 |
|
$char === self::substr($str2, -$i, 1, $encoding) |
6912
|
|
|
) { |
6913
|
3 |
|
$longest_common_suffix = $char . $longest_common_suffix; |
6914
|
|
|
} else { |
6915
|
3 |
|
break; |
6916
|
|
|
} |
6917
|
|
|
} |
6918
|
|
|
} |
6919
|
|
|
|
6920
|
8 |
|
return $longest_common_suffix; |
6921
|
|
|
} |
6922
|
|
|
|
6923
|
|
|
/** |
6924
|
|
|
* Returns true if $str matches the supplied pattern, false otherwise. |
6925
|
|
|
* |
6926
|
|
|
* @param string $str <p>The input string.</p> |
6927
|
|
|
* @param string $pattern <p>Regex pattern to match against.</p> |
6928
|
|
|
* |
6929
|
|
|
* @psalm-pure |
6930
|
|
|
* |
6931
|
|
|
* @return bool |
6932
|
|
|
* <p>Whether or not $str matches the pattern.</p> |
6933
|
|
|
*/ |
6934
|
10 |
|
public static function str_matches_pattern(string $str, string $pattern): bool |
6935
|
|
|
{ |
6936
|
10 |
|
return (bool) \preg_match('/' . $pattern . '/u', $str); |
6937
|
|
|
} |
6938
|
|
|
|
6939
|
|
|
/** |
6940
|
|
|
* Returns whether or not a character exists at an index. Offsets may be |
6941
|
|
|
* negative to count from the last character in the string. Implements |
6942
|
|
|
* part of the ArrayAccess interface. |
6943
|
|
|
* |
6944
|
|
|
* @param string $str <p>The input string.</p> |
6945
|
|
|
* @param int $offset <p>The index to check.</p> |
6946
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6947
|
|
|
* |
6948
|
|
|
* @psalm-pure |
6949
|
|
|
* |
6950
|
|
|
* @return bool |
6951
|
|
|
* <p>Whether or not the index exists.</p> |
6952
|
|
|
*/ |
6953
|
6 |
|
public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool |
6954
|
|
|
{ |
6955
|
|
|
// init |
6956
|
6 |
|
$length = (int) self::strlen($str, $encoding); |
6957
|
|
|
|
6958
|
6 |
|
if ($offset >= 0) { |
6959
|
3 |
|
return $length > $offset; |
6960
|
|
|
} |
6961
|
|
|
|
6962
|
3 |
|
return $length >= \abs($offset); |
6963
|
|
|
} |
6964
|
|
|
|
6965
|
|
|
/** |
6966
|
|
|
* Returns the character at the given index. Offsets may be negative to |
6967
|
|
|
* count from the last character in the string. Implements part of the |
6968
|
|
|
* ArrayAccess interface, and throws an OutOfBoundsException if the index |
6969
|
|
|
* does not exist. |
6970
|
|
|
* |
6971
|
|
|
* @param string $str <p>The input string.</p> |
6972
|
|
|
* @param int $index <p>The <strong>index</strong> from which to retrieve the char.</p> |
6973
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6974
|
|
|
* |
6975
|
|
|
* @throws \OutOfBoundsException if the positive or negative offset does not exist |
6976
|
|
|
* |
6977
|
|
|
* @return string |
6978
|
|
|
* <p>The character at the specified index.</p> |
6979
|
|
|
* |
6980
|
|
|
* @psalm-pure |
6981
|
|
|
*/ |
6982
|
2 |
|
public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string |
6983
|
|
|
{ |
6984
|
|
|
// init |
6985
|
2 |
|
$length = (int) self::strlen($str); |
6986
|
|
|
|
6987
|
|
|
if ( |
6988
|
2 |
|
($index >= 0 && $length <= $index) |
6989
|
|
|
|| |
6990
|
2 |
|
$length < \abs($index) |
6991
|
|
|
) { |
6992
|
1 |
|
throw new \OutOfBoundsException('No character exists at the index'); |
6993
|
|
|
} |
6994
|
|
|
|
6995
|
1 |
|
return self::char_at($str, $index, $encoding); |
6996
|
|
|
} |
6997
|
|
|
|
6998
|
|
|
/** |
6999
|
|
|
* Pad a UTF-8 string to a given length with another string. |
7000
|
|
|
* |
7001
|
|
|
* EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code> |
7002
|
|
|
* |
7003
|
|
|
* @param string $str <p>The input string.</p> |
7004
|
|
|
* @param int $pad_length <p>The length of return string.</p> |
7005
|
|
|
* @param string $pad_string [optional] <p>String to use for padding the input string.</p> |
7006
|
|
|
* @param int|string $pad_type [optional] <p> |
7007
|
|
|
* Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br> |
7008
|
|
|
* <strong>STR_PAD_LEFT</strong> [or string "left"] or<br> |
7009
|
|
|
* <strong>STR_PAD_BOTH</strong> [or string "both"] |
7010
|
|
|
* </p> |
7011
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
7012
|
|
|
* |
7013
|
|
|
* @psalm-pure |
7014
|
|
|
* |
7015
|
|
|
* @return string |
7016
|
|
|
* <p>Returns the padded string.</p> |
7017
|
|
|
*/ |
7018
|
41 |
|
public static function str_pad( |
7019
|
|
|
string $str, |
7020
|
|
|
int $pad_length, |
7021
|
|
|
string $pad_string = ' ', |
7022
|
|
|
$pad_type = \STR_PAD_RIGHT, |
7023
|
|
|
string $encoding = 'UTF-8' |
7024
|
|
|
): string { |
7025
|
41 |
|
if ($pad_length === 0 || $pad_string === '') { |
7026
|
1 |
|
return $str; |
7027
|
|
|
} |
7028
|
|
|
|
7029
|
41 |
|
if ($pad_type !== (int) $pad_type) { |
7030
|
13 |
|
if ($pad_type === 'left') { |
7031
|
3 |
|
$pad_type = \STR_PAD_LEFT; |
7032
|
10 |
|
} elseif ($pad_type === 'right') { |
7033
|
6 |
|
$pad_type = \STR_PAD_RIGHT; |
7034
|
4 |
|
} elseif ($pad_type === 'both') { |
7035
|
3 |
|
$pad_type = \STR_PAD_BOTH; |
7036
|
|
|
} else { |
7037
|
1 |
|
throw new \InvalidArgumentException( |
7038
|
1 |
|
'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'" |
7039
|
|
|
); |
7040
|
|
|
} |
7041
|
|
|
} |
7042
|
|
|
|
7043
|
40 |
|
if ($encoding === 'UTF-8') { |
7044
|
25 |
|
$str_length = (int) \mb_strlen($str); |
7045
|
|
|
|
7046
|
25 |
|
if ($pad_length >= $str_length) { |
7047
|
25 |
|
switch ($pad_type) { |
7048
|
|
|
case \STR_PAD_LEFT: |
7049
|
8 |
|
$ps_length = (int) \mb_strlen($pad_string); |
7050
|
|
|
|
7051
|
8 |
|
$diff = ($pad_length - $str_length); |
7052
|
|
|
|
7053
|
8 |
|
$pre = (string) \mb_substr( |
7054
|
8 |
|
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)), |
7055
|
8 |
|
0, |
7056
|
8 |
|
$diff |
7057
|
|
|
); |
7058
|
8 |
|
$post = ''; |
7059
|
|
|
|
7060
|
8 |
|
break; |
7061
|
|
|
|
7062
|
|
|
case \STR_PAD_BOTH: |
7063
|
14 |
|
$diff = ($pad_length - $str_length); |
7064
|
|
|
|
7065
|
14 |
|
$ps_length_left = (int) \floor($diff / 2); |
7066
|
|
|
|
7067
|
14 |
|
$ps_length_right = (int) \ceil($diff / 2); |
7068
|
|
|
|
7069
|
14 |
|
$pre = (string) \mb_substr( |
7070
|
14 |
|
\str_repeat($pad_string, $ps_length_left), |
7071
|
14 |
|
0, |
7072
|
14 |
|
$ps_length_left |
7073
|
|
|
); |
7074
|
14 |
|
$post = (string) \mb_substr( |
7075
|
14 |
|
\str_repeat($pad_string, $ps_length_right), |
7076
|
14 |
|
0, |
7077
|
14 |
|
$ps_length_right |
7078
|
|
|
); |
7079
|
|
|
|
7080
|
14 |
|
break; |
7081
|
|
|
|
7082
|
|
|
case \STR_PAD_RIGHT: |
7083
|
|
|
default: |
7084
|
9 |
|
$ps_length = (int) \mb_strlen($pad_string); |
7085
|
|
|
|
7086
|
9 |
|
$diff = ($pad_length - $str_length); |
7087
|
|
|
|
7088
|
9 |
|
$post = (string) \mb_substr( |
7089
|
9 |
|
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)), |
7090
|
9 |
|
0, |
7091
|
9 |
|
$diff |
7092
|
|
|
); |
7093
|
9 |
|
$pre = ''; |
7094
|
|
|
} |
7095
|
|
|
|
7096
|
25 |
|
return $pre . $str . $post; |
7097
|
|
|
} |
7098
|
|
|
|
7099
|
3 |
|
return $str; |
7100
|
|
|
} |
7101
|
|
|
|
7102
|
15 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
7103
|
|
|
|
7104
|
15 |
|
$str_length = (int) self::strlen($str, $encoding); |
7105
|
|
|
|
7106
|
15 |
|
if ($pad_length >= $str_length) { |
7107
|
14 |
|
switch ($pad_type) { |
7108
|
|
|
case \STR_PAD_LEFT: |
7109
|
5 |
|
$ps_length = (int) self::strlen($pad_string, $encoding); |
7110
|
|
|
|
7111
|
5 |
|
$diff = ($pad_length - $str_length); |
7112
|
|
|
|
7113
|
5 |
|
$pre = (string) self::substr( |
7114
|
5 |
|
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)), |
7115
|
5 |
|
0, |
7116
|
|
|
$diff, |
7117
|
|
|
$encoding |
7118
|
|
|
); |
7119
|
5 |
|
$post = ''; |
7120
|
|
|
|
7121
|
5 |
|
break; |
7122
|
|
|
|
7123
|
|
|
case \STR_PAD_BOTH: |
7124
|
3 |
|
$diff = ($pad_length - $str_length); |
7125
|
|
|
|
7126
|
3 |
|
$ps_length_left = (int) \floor($diff / 2); |
7127
|
|
|
|
7128
|
3 |
|
$ps_length_right = (int) \ceil($diff / 2); |
7129
|
|
|
|
7130
|
3 |
|
$pre = (string) self::substr( |
7131
|
3 |
|
\str_repeat($pad_string, $ps_length_left), |
7132
|
3 |
|
0, |
7133
|
|
|
$ps_length_left, |
7134
|
|
|
$encoding |
7135
|
|
|
); |
7136
|
3 |
|
$post = (string) self::substr( |
7137
|
3 |
|
\str_repeat($pad_string, $ps_length_right), |
7138
|
3 |
|
0, |
7139
|
|
|
$ps_length_right, |
7140
|
|
|
$encoding |
7141
|
|
|
); |
7142
|
|
|
|
7143
|
3 |
|
break; |
7144
|
|
|
|
7145
|
|
|
case \STR_PAD_RIGHT: |
7146
|
|
|
default: |
7147
|
6 |
|
$ps_length = (int) self::strlen($pad_string, $encoding); |
7148
|
|
|
|
7149
|
6 |
|
$diff = ($pad_length - $str_length); |
7150
|
|
|
|
7151
|
6 |
|
$post = (string) self::substr( |
7152
|
6 |
|
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)), |
7153
|
6 |
|
0, |
7154
|
|
|
$diff, |
7155
|
|
|
$encoding |
7156
|
|
|
); |
7157
|
6 |
|
$pre = ''; |
7158
|
|
|
} |
7159
|
|
|
|
7160
|
14 |
|
return $pre . $str . $post; |
7161
|
|
|
} |
7162
|
|
|
|
7163
|
1 |
|
return $str; |
7164
|
|
|
} |
7165
|
|
|
|
7166
|
|
|
/** |
7167
|
|
|
* Returns a new string of a given length such that both sides of the |
7168
|
|
|
* string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'. |
7169
|
|
|
* |
7170
|
|
|
* @param string $str |
7171
|
|
|
* @param int $length <p>Desired string length after padding.</p> |
7172
|
|
|
* @param string $pad_str [optional] <p>String used to pad, defaults to space. Default: ' '</p> |
7173
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
7174
|
|
|
* |
7175
|
|
|
* @psalm-pure |
7176
|
|
|
* |
7177
|
|
|
* @return string |
7178
|
|
|
* <p>The string with padding applied.</p> |
7179
|
|
|
*/ |
7180
|
11 |
|
public static function str_pad_both( |
7181
|
|
|
string $str, |
7182
|
|
|
int $length, |
7183
|
|
|
string $pad_str = ' ', |
7184
|
|
|
string $encoding = 'UTF-8' |
7185
|
|
|
): string { |
7186
|
11 |
|
return self::str_pad( |
7187
|
11 |
|
$str, |
7188
|
11 |
|
$length, |
7189
|
11 |
|
$pad_str, |
7190
|
11 |
|
\STR_PAD_BOTH, |
7191
|
11 |
|
$encoding |
7192
|
|
|
); |
7193
|
|
|
} |
7194
|
|
|
|
7195
|
|
|
/** |
7196
|
|
|
* Returns a new string of a given length such that the beginning of the |
7197
|
|
|
* string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'. |
7198
|
|
|
* |
7199
|
|
|
* @param string $str |
7200
|
|
|
* @param int $length <p>Desired string length after padding.</p> |
7201
|
|
|
* @param string $pad_str [optional] <p>String used to pad, defaults to space. Default: ' '</p> |
7202
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
7203
|
|
|
* |
7204
|
|
|
* @psalm-pure |
7205
|
|
|
* |
7206
|
|
|
* @return string |
7207
|
|
|
* <p>The string with left padding.</p> |
7208
|
|
|
*/ |
7209
|
7 |
|
public static function str_pad_left( |
7210
|
|
|
string $str, |
7211
|
|
|
int $length, |
7212
|
|
|
string $pad_str = ' ', |
7213
|
|
|
string $encoding = 'UTF-8' |
7214
|
|
|
): string { |
7215
|
7 |
|
return self::str_pad( |
7216
|
7 |
|
$str, |
7217
|
7 |
|
$length, |
7218
|
7 |
|
$pad_str, |
7219
|
7 |
|
\STR_PAD_LEFT, |
7220
|
7 |
|
$encoding |
7221
|
|
|
); |
7222
|
|
|
} |
7223
|
|
|
|
7224
|
|
|
/** |
7225
|
|
|
* Returns a new string of a given length such that the end of the string |
7226
|
|
|
* is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'. |
7227
|
|
|
* |
7228
|
|
|
* @param string $str |
7229
|
|
|
* @param int $length <p>Desired string length after padding.</p> |
7230
|
|
|
* @param string $pad_str [optional] <p>String used to pad, defaults to space. Default: ' '</p> |
7231
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
7232
|
|
|
* |
7233
|
|
|
* @psalm-pure |
7234
|
|
|
* |
7235
|
|
|
* @return string |
7236
|
|
|
* <p>The string with right padding.</p> |
7237
|
|
|
*/ |
7238
|
7 |
|
public static function str_pad_right( |
7239
|
|
|
string $str, |
7240
|
|
|
int $length, |
7241
|
|
|
string $pad_str = ' ', |
7242
|
|
|
string $encoding = 'UTF-8' |
7243
|
|
|
): string { |
7244
|
7 |
|
return self::str_pad( |
7245
|
7 |
|
$str, |
7246
|
7 |
|
$length, |
7247
|
7 |
|
$pad_str, |
7248
|
7 |
|
\STR_PAD_RIGHT, |
7249
|
7 |
|
$encoding |
7250
|
|
|
); |
7251
|
|
|
} |
7252
|
|
|
|
7253
|
|
|
/** |
7254
|
|
|
* Repeat a string. |
7255
|
|
|
* |
7256
|
|
|
* EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code> |
7257
|
|
|
* |
7258
|
|
|
* @param string $str <p> |
7259
|
|
|
* The string to be repeated. |
7260
|
|
|
* </p> |
7261
|
|
|
* @param int $multiplier <p> |
7262
|
|
|
* Number of time the input string should be |
7263
|
|
|
* repeated. |
7264
|
|
|
* </p> |
7265
|
|
|
* <p> |
7266
|
|
|
* multiplier has to be greater than or equal to 0. |
7267
|
|
|
* If the multiplier is set to 0, the function |
7268
|
|
|
* will return an empty string. |
7269
|
|
|
* </p> |
7270
|
|
|
* |
7271
|
|
|
* @psalm-pure |
7272
|
|
|
* |
7273
|
|
|
* @return string |
7274
|
|
|
* <p>The repeated string.</p> |
7275
|
|
|
*/ |
7276
|
9 |
|
public static function str_repeat(string $str, int $multiplier): string |
7277
|
|
|
{ |
7278
|
9 |
|
$str = self::filter($str); |
7279
|
|
|
|
7280
|
9 |
|
return \str_repeat($str, $multiplier); |
7281
|
|
|
} |
7282
|
|
|
|
7283
|
|
|
/** |
7284
|
|
|
* INFO: This is only a wrapper for "str_replace()" -> the original functions is already UTF-8 safe. |
7285
|
|
|
* |
7286
|
|
|
* Replace all occurrences of the search string with the replacement string |
7287
|
|
|
* |
7288
|
|
|
* @see http://php.net/manual/en/function.str-replace.php |
7289
|
|
|
* |
7290
|
|
|
* @param string|string[] $search <p> |
7291
|
|
|
* The value being searched for, otherwise known as the needle. |
7292
|
|
|
* An array may be used to designate multiple needles. |
7293
|
|
|
* </p> |
7294
|
|
|
* @param string|string[] $replace <p> |
7295
|
|
|
* The replacement value that replaces found search |
7296
|
|
|
* values. An array may be used to designate multiple replacements. |
7297
|
|
|
* </p> |
7298
|
|
|
* @param string|string[] $subject <p> |
7299
|
|
|
* The string or array of strings being searched and replaced on, |
7300
|
|
|
* otherwise known as the haystack. |
7301
|
|
|
* </p> |
7302
|
|
|
* <p> |
7303
|
|
|
* If subject is an array, then the search and |
7304
|
|
|
* replace is performed with every entry of |
7305
|
|
|
* subject, and the return value is an array as |
7306
|
|
|
* well. |
7307
|
|
|
* </p> |
7308
|
|
|
* @param int|null $count [optional] <p> |
7309
|
|
|
* If passed, this will hold the number of matched and replaced needles. |
7310
|
|
|
* </p> |
7311
|
|
|
* |
7312
|
|
|
* @psalm-pure |
7313
|
|
|
* |
7314
|
|
|
* @return string|string[] |
7315
|
|
|
* <p>This function returns a string or an array with the replaced values.</p> |
7316
|
|
|
* |
7317
|
|
|
* @template TStrReplaceSubject |
7318
|
|
|
* @phpstan-param TStrReplaceSubject $subject |
7319
|
|
|
* @phpstan-return TStrReplaceSubject |
7320
|
|
|
* |
7321
|
|
|
* @deprecated please use \str_replace() instead |
7322
|
|
|
*/ |
7323
|
12 |
|
public static function str_replace( |
7324
|
|
|
$search, |
7325
|
|
|
$replace, |
7326
|
|
|
$subject, |
7327
|
|
|
int &$count = null |
7328
|
|
|
) { |
7329
|
|
|
/** |
7330
|
|
|
* @psalm-suppress PossiblyNullArgument |
7331
|
|
|
* @phpstan-var TStrReplaceSubject $return; |
7332
|
|
|
*/ |
7333
|
12 |
|
$return = \str_replace( |
7334
|
12 |
|
$search, |
7335
|
12 |
|
$replace, |
7336
|
12 |
|
$subject, |
7337
|
12 |
|
$count |
7338
|
|
|
); |
7339
|
|
|
|
7340
|
12 |
|
return $return; |
7341
|
|
|
} |
7342
|
|
|
|
7343
|
|
|
/** |
7344
|
|
|
* Replaces $search from the beginning of string with $replacement. |
7345
|
|
|
* |
7346
|
|
|
* @param string $str <p>The input string.</p> |
7347
|
|
|
* @param string $search <p>The string to search for.</p> |
7348
|
|
|
* @param string $replacement <p>The replacement.</p> |
7349
|
|
|
* |
7350
|
|
|
* @psalm-pure |
7351
|
|
|
* |
7352
|
|
|
* @return string |
7353
|
|
|
* <p>A string after the replacements.</p> |
7354
|
|
|
*/ |
7355
|
17 |
|
public static function str_replace_beginning( |
7356
|
|
|
string $str, |
7357
|
|
|
string $search, |
7358
|
|
|
string $replacement |
7359
|
|
|
): string { |
7360
|
17 |
|
if ($str === '') { |
7361
|
4 |
|
if ($replacement === '') { |
7362
|
2 |
|
return ''; |
7363
|
|
|
} |
7364
|
|
|
|
7365
|
2 |
|
if ($search === '') { |
7366
|
2 |
|
return $replacement; |
7367
|
|
|
} |
7368
|
|
|
} |
7369
|
|
|
|
7370
|
13 |
|
if ($search === '') { |
7371
|
2 |
|
return $str . $replacement; |
7372
|
|
|
} |
7373
|
|
|
|
7374
|
11 |
|
$searchLength = \strlen($search); |
7375
|
11 |
|
if (\strncmp($str, $search, $searchLength) === 0) { |
7376
|
9 |
|
return $replacement . \substr($str, $searchLength); |
7377
|
|
|
} |
7378
|
|
|
|
7379
|
2 |
|
return $str; |
7380
|
|
|
} |
7381
|
|
|
|
7382
|
|
|
/** |
7383
|
|
|
* Replaces $search from the ending of string with $replacement. |
7384
|
|
|
* |
7385
|
|
|
* @param string $str <p>The input string.</p> |
7386
|
|
|
* @param string $search <p>The string to search for.</p> |
7387
|
|
|
* @param string $replacement <p>The replacement.</p> |
7388
|
|
|
* |
7389
|
|
|
* @psalm-pure |
7390
|
|
|
* |
7391
|
|
|
* @return string |
7392
|
|
|
* <p>A string after the replacements.</p> |
7393
|
|
|
*/ |
7394
|
17 |
|
public static function str_replace_ending( |
7395
|
|
|
string $str, |
7396
|
|
|
string $search, |
7397
|
|
|
string $replacement |
7398
|
|
|
): string { |
7399
|
17 |
|
if ($str === '') { |
7400
|
4 |
|
if ($replacement === '') { |
7401
|
2 |
|
return ''; |
7402
|
|
|
} |
7403
|
|
|
|
7404
|
2 |
|
if ($search === '') { |
7405
|
2 |
|
return $replacement; |
7406
|
|
|
} |
7407
|
|
|
} |
7408
|
|
|
|
7409
|
13 |
|
if ($search === '') { |
7410
|
2 |
|
return $str . $replacement; |
7411
|
|
|
} |
7412
|
|
|
|
7413
|
11 |
|
if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) { |
7414
|
8 |
|
$str = \substr($str, 0, -\strlen($search)) . $replacement; |
7415
|
|
|
} |
7416
|
|
|
|
7417
|
11 |
|
return $str; |
7418
|
|
|
} |
7419
|
|
|
|
7420
|
|
|
/** |
7421
|
|
|
* Replace the first "$search"-term with the "$replace"-term. |
7422
|
|
|
* |
7423
|
|
|
* @param string $search |
7424
|
|
|
* @param string $replace |
7425
|
|
|
* @param string $subject |
7426
|
|
|
* |
7427
|
|
|
* @psalm-pure |
7428
|
|
|
* |
7429
|
|
|
* @return string |
7430
|
|
|
* |
7431
|
|
|
* @psalm-suppress InvalidReturnType |
7432
|
|
|
*/ |
7433
|
2 |
|
public static function str_replace_first( |
7434
|
|
|
string $search, |
7435
|
|
|
string $replace, |
7436
|
|
|
string $subject |
7437
|
|
|
): string { |
7438
|
2 |
|
$pos = self::strpos($subject, $search); |
7439
|
|
|
|
7440
|
2 |
|
if ($pos !== false) { |
7441
|
|
|
/** |
7442
|
|
|
* @psalm-suppress InvalidReturnStatement |
7443
|
|
|
*/ |
7444
|
2 |
|
return self::substr_replace( |
7445
|
2 |
|
$subject, |
7446
|
|
|
$replace, |
7447
|
|
|
$pos, |
7448
|
2 |
|
(int) self::strlen($search) |
7449
|
|
|
); |
7450
|
|
|
} |
7451
|
|
|
|
7452
|
|
|
return $subject; |
7453
|
|
|
} |
7454
|
|
|
|
7455
|
|
|
/** |
7456
|
|
|
* Replace the last "$search"-term with the "$replace"-term. |
7457
|
|
|
* |
7458
|
|
|
* @param string $search |
7459
|
|
|
* @param string $replace |
7460
|
|
|
* @param string $subject |
7461
|
|
|
* |
7462
|
|
|
* @psalm-pure |
7463
|
|
|
* |
7464
|
|
|
* @return string |
7465
|
|
|
* |
7466
|
|
|
* @psalm-suppress InvalidReturnType |
7467
|
|
|
*/ |
7468
|
2 |
|
public static function str_replace_last( |
7469
|
|
|
string $search, |
7470
|
|
|
string $replace, |
7471
|
|
|
string $subject |
7472
|
|
|
): string { |
7473
|
2 |
|
$pos = self::strrpos($subject, $search); |
7474
|
2 |
|
if ($pos !== false) { |
7475
|
|
|
/** |
7476
|
|
|
* @psalm-suppress InvalidReturnStatement |
7477
|
|
|
*/ |
7478
|
2 |
|
return self::substr_replace( |
7479
|
2 |
|
$subject, |
7480
|
|
|
$replace, |
7481
|
|
|
$pos, |
7482
|
2 |
|
(int) self::strlen($search) |
7483
|
|
|
); |
7484
|
|
|
} |
7485
|
|
|
|
7486
|
|
|
return $subject; |
7487
|
|
|
} |
7488
|
|
|
|
7489
|
|
|
/** |
7490
|
|
|
* Shuffles all the characters in the string. |
7491
|
|
|
* |
7492
|
|
|
* INFO: uses random algorithm which is weak for cryptography purposes |
7493
|
|
|
* |
7494
|
|
|
* EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code> |
7495
|
|
|
* |
7496
|
|
|
* @param string $str <p>The input string</p> |
7497
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
7498
|
|
|
* |
7499
|
|
|
* @return string |
7500
|
|
|
* <p>The shuffled string.</p> |
7501
|
|
|
*/ |
7502
|
5 |
|
public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string |
7503
|
|
|
{ |
7504
|
5 |
|
if ($encoding === 'UTF-8') { |
7505
|
5 |
|
$indexes = \range(0, (int) \mb_strlen($str) - 1); |
7506
|
5 |
|
\shuffle($indexes); |
7507
|
|
|
|
7508
|
|
|
// init |
7509
|
5 |
|
$shuffled_str = ''; |
7510
|
|
|
|
7511
|
5 |
|
foreach ($indexes as &$i) { |
7512
|
5 |
|
$tmp_sub_str = \mb_substr($str, $i, 1); |
7513
|
5 |
|
if ($tmp_sub_str !== false) { |
7514
|
5 |
|
$shuffled_str .= $tmp_sub_str; |
7515
|
|
|
} |
7516
|
|
|
} |
7517
|
|
|
} else { |
7518
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
7519
|
|
|
|
7520
|
|
|
$indexes = \range(0, (int) self::strlen($str, $encoding) - 1); |
7521
|
|
|
\shuffle($indexes); |
7522
|
|
|
|
7523
|
|
|
// init |
7524
|
|
|
$shuffled_str = ''; |
7525
|
|
|
|
7526
|
|
|
foreach ($indexes as &$i) { |
7527
|
|
|
$tmp_sub_str = self::substr($str, $i, 1, $encoding); |
7528
|
|
|
if ($tmp_sub_str !== false) { |
7529
|
|
|
$shuffled_str .= $tmp_sub_str; |
7530
|
|
|
} |
7531
|
|
|
} |
7532
|
|
|
} |
7533
|
|
|
|
7534
|
5 |
|
return $shuffled_str; |
7535
|
|
|
} |
7536
|
|
|
|
7537
|
|
|
/** |
7538
|
|
|
* Returns the substring beginning at $start, and up to, but not including |
7539
|
|
|
* the index specified by $end. If $end is omitted, the function extracts |
7540
|
|
|
* the remaining string. If $end is negative, it is computed from the end |
7541
|
|
|
* of the string. |
7542
|
|
|
* |
7543
|
|
|
* @param string $str |
7544
|
|
|
* @param int $start <p>Initial index from which to begin extraction.</p> |
7545
|
|
|
* @param int|null $end [optional] <p>Index at which to end extraction. Default: null</p> |
7546
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
7547
|
|
|
* |
7548
|
|
|
* @psalm-pure |
7549
|
|
|
* |
7550
|
|
|
* @return false|string |
7551
|
|
|
* <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i> |
7552
|
|
|
* characters long, <b>FALSE</b> will be returned. |
7553
|
|
|
*/ |
7554
|
18 |
|
public static function str_slice( |
7555
|
|
|
string $str, |
7556
|
|
|
int $start, |
7557
|
|
|
int $end = null, |
7558
|
|
|
string $encoding = 'UTF-8' |
7559
|
|
|
) { |
7560
|
18 |
|
if ($encoding === 'UTF-8') { |
7561
|
7 |
|
if ($end === null) { |
7562
|
1 |
|
$length = (int) \mb_strlen($str); |
7563
|
6 |
|
} elseif ($end >= 0 && $end <= $start) { |
7564
|
2 |
|
return ''; |
7565
|
4 |
|
} elseif ($end < 0) { |
7566
|
1 |
|
$length = (int) \mb_strlen($str) + $end - $start; |
7567
|
|
|
} else { |
7568
|
3 |
|
$length = $end - $start; |
7569
|
|
|
} |
7570
|
|
|
|
7571
|
5 |
|
return \mb_substr($str, $start, $length); |
7572
|
|
|
} |
7573
|
|
|
|
7574
|
11 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
7575
|
|
|
|
7576
|
11 |
|
if ($end === null) { |
7577
|
5 |
|
$length = (int) self::strlen($str, $encoding); |
7578
|
6 |
|
} elseif ($end >= 0 && $end <= $start) { |
7579
|
2 |
|
return ''; |
7580
|
4 |
|
} elseif ($end < 0) { |
7581
|
1 |
|
$length = (int) self::strlen($str, $encoding) + $end - $start; |
7582
|
|
|
} else { |
7583
|
3 |
|
$length = $end - $start; |
7584
|
|
|
} |
7585
|
|
|
|
7586
|
9 |
|
return self::substr($str, $start, $length, $encoding); |
7587
|
|
|
} |
7588
|
|
|
|
7589
|
|
|
/** |
7590
|
|
|
* Convert a string to e.g.: "snake_case" |
7591
|
|
|
* |
7592
|
|
|
* @param string $str |
7593
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
7594
|
|
|
* |
7595
|
|
|
* @psalm-pure |
7596
|
|
|
* |
7597
|
|
|
* @return string |
7598
|
|
|
* <p>A string in snake_case.</p> |
7599
|
|
|
*/ |
7600
|
22 |
|
public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string |
7601
|
|
|
{ |
7602
|
22 |
|
if ($str === '') { |
7603
|
|
|
return ''; |
7604
|
|
|
} |
7605
|
|
|
|
7606
|
22 |
|
$str = \str_replace( |
7607
|
22 |
|
'-', |
7608
|
22 |
|
'_', |
7609
|
22 |
|
self::normalize_whitespace($str) |
7610
|
|
|
); |
7611
|
|
|
|
7612
|
22 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
7613
|
19 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
7614
|
|
|
} |
7615
|
|
|
|
7616
|
22 |
|
$str = (string) \preg_replace_callback( |
7617
|
22 |
|
'/([\\p{N}|\\p{Lu}])/u', |
7618
|
|
|
/** |
7619
|
|
|
* @param string[] $matches |
7620
|
|
|
* |
7621
|
|
|
* @psalm-pure |
7622
|
|
|
* |
7623
|
|
|
* @return string |
7624
|
|
|
*/ |
7625
|
22 |
|
static function (array $matches) use ($encoding): string { |
7626
|
9 |
|
$match = $matches[1]; |
7627
|
9 |
|
$match_int = (int) $match; |
7628
|
|
|
|
7629
|
9 |
|
if ((string) $match_int === $match) { |
7630
|
4 |
|
return '_' . $match . '_'; |
7631
|
|
|
} |
7632
|
|
|
|
7633
|
5 |
|
if ($encoding === 'UTF-8') { |
7634
|
5 |
|
return '_' . \mb_strtolower($match); |
7635
|
|
|
} |
7636
|
|
|
|
7637
|
|
|
return '_' . self::strtolower($match, $encoding); |
7638
|
22 |
|
}, |
7639
|
22 |
|
$str |
7640
|
|
|
); |
7641
|
|
|
|
7642
|
22 |
|
$str = (string) \preg_replace( |
7643
|
|
|
[ |
7644
|
22 |
|
'/\\s+/u', // convert spaces to "_" |
7645
|
|
|
'/^\\s+|\\s+$/u', // trim leading & trailing spaces |
7646
|
|
|
'/_+/', // remove double "_" |
7647
|
|
|
], |
7648
|
|
|
[ |
7649
|
22 |
|
'_', |
7650
|
|
|
'', |
7651
|
|
|
'_', |
7652
|
|
|
], |
7653
|
22 |
|
$str |
7654
|
|
|
); |
7655
|
|
|
|
7656
|
22 |
|
return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace |
7657
|
|
|
} |
7658
|
|
|
|
7659
|
|
|
/** |
7660
|
|
|
* Sort all characters according to code points. |
7661
|
|
|
* |
7662
|
|
|
* EXAMPLE: <code>UTF8::str_sort(' -ABC-中文空白- '); // ' ---ABC中文白空'</code> |
7663
|
|
|
* |
7664
|
|
|
* @param string $str <p>A UTF-8 string.</p> |
7665
|
|
|
* @param bool $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p> |
7666
|
|
|
* @param bool $desc <p>If <strong>true</strong>, will sort characters in reverse code point order.</p> |
7667
|
|
|
* |
7668
|
|
|
* @psalm-pure |
7669
|
|
|
* |
7670
|
|
|
* @return string |
7671
|
|
|
* <p>A string of sorted characters.</p> |
7672
|
|
|
*/ |
7673
|
2 |
|
public static function str_sort(string $str, bool $unique = false, bool $desc = false): string |
7674
|
|
|
{ |
7675
|
|
|
/** @var int[] $array */ |
7676
|
2 |
|
$array = self::codepoints($str); |
7677
|
|
|
|
7678
|
2 |
|
if ($unique) { |
7679
|
2 |
|
$array = \array_flip(\array_flip($array)); |
7680
|
|
|
} |
7681
|
|
|
|
7682
|
2 |
|
if ($desc) { |
7683
|
2 |
|
\arsort($array); |
7684
|
|
|
} else { |
7685
|
2 |
|
\asort($array); |
7686
|
|
|
} |
7687
|
|
|
|
7688
|
2 |
|
return self::string($array); |
7689
|
|
|
} |
7690
|
|
|
|
7691
|
|
|
/** |
7692
|
|
|
* Convert a string to an array of Unicode characters. |
7693
|
|
|
* |
7694
|
|
|
* EXAMPLE: <code> |
7695
|
|
|
* UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']] |
7696
|
|
|
* </code> |
7697
|
|
|
* |
7698
|
|
|
* @param int[]|string[] $input <p>The string[] or int[] to split into array.</p> |
7699
|
|
|
* @param int $length [optional] <p>Max character length of each array |
7700
|
|
|
* lement.</p> |
7701
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the |
7702
|
|
|
* string.</p> |
7703
|
|
|
* @param bool $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use |
7704
|
|
|
* "mb_substr"</p> |
7705
|
|
|
* |
7706
|
|
|
* @psalm-pure |
7707
|
|
|
* |
7708
|
|
|
* @return string[][] |
7709
|
|
|
* <p>An array containing chunks of the input.</p> |
7710
|
|
|
*/ |
7711
|
1 |
|
public static function str_split_array( |
7712
|
|
|
array $input, |
7713
|
|
|
int $length = 1, |
7714
|
|
|
bool $clean_utf8 = false, |
7715
|
|
|
bool $try_to_use_mb_functions = true |
7716
|
|
|
): array { |
7717
|
1 |
|
foreach ($input as &$v) { |
7718
|
1 |
|
$v = self::str_split( |
7719
|
1 |
|
$v, |
7720
|
|
|
$length, |
7721
|
|
|
$clean_utf8, |
7722
|
|
|
$try_to_use_mb_functions |
7723
|
|
|
); |
7724
|
|
|
} |
7725
|
|
|
|
7726
|
|
|
/** @var string[][] $input */ |
7727
|
1 |
|
return $input; |
7728
|
|
|
} |
7729
|
|
|
|
7730
|
|
|
/** |
7731
|
|
|
* Convert a string to an array of unicode characters. |
7732
|
|
|
* |
7733
|
|
|
* EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code> |
7734
|
|
|
* |
7735
|
|
|
* @param int|string $input <p>The string or int to split into array.</p> |
7736
|
|
|
* @param int $length [optional] <p>Max character length of each array |
7737
|
|
|
* element.</p> |
7738
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the |
7739
|
|
|
* string.</p> |
7740
|
|
|
* @param bool $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use |
7741
|
|
|
* "mb_substr"</p> |
7742
|
|
|
* |
7743
|
|
|
* @psalm-pure |
7744
|
|
|
* |
7745
|
|
|
* @return string[] |
7746
|
|
|
* <p>An array containing chunks of chars from the input.</p> |
7747
|
|
|
*/ |
7748
|
96 |
|
public static function str_split( |
7749
|
|
|
$input, |
7750
|
|
|
int $length = 1, |
7751
|
|
|
bool $clean_utf8 = false, |
7752
|
|
|
bool $try_to_use_mb_functions = true |
7753
|
|
|
): array { |
7754
|
96 |
|
if ($length <= 0) { |
7755
|
3 |
|
return []; |
7756
|
|
|
} |
7757
|
|
|
|
7758
|
|
|
// this is only an old fallback |
7759
|
|
|
/** @noinspection PhpSillyAssignmentInspection - hack for phpstan */ |
7760
|
|
|
/** @var int|int[]|string|string[] $input */ |
7761
|
95 |
|
$input = $input; |
7762
|
95 |
|
if (\is_array($input)) { |
7763
|
|
|
/** @psalm-suppress InvalidReturnStatement */ |
7764
|
|
|
/** @phpstan-ignore-next-line - old code :/ */ |
7765
|
|
|
return self::str_split_array( |
|
|
|
|
7766
|
|
|
$input, |
7767
|
|
|
$length, |
7768
|
|
|
$clean_utf8, |
7769
|
|
|
$try_to_use_mb_functions |
7770
|
|
|
); |
7771
|
|
|
} |
7772
|
|
|
|
7773
|
|
|
// init |
7774
|
95 |
|
$input = (string) $input; |
7775
|
|
|
|
7776
|
95 |
|
if ($input === '') { |
7777
|
14 |
|
return []; |
7778
|
|
|
} |
7779
|
|
|
|
7780
|
92 |
|
if ($clean_utf8) { |
7781
|
25 |
|
$input = self::clean($input); |
7782
|
|
|
} |
7783
|
|
|
|
7784
|
|
|
if ( |
7785
|
92 |
|
$try_to_use_mb_functions |
7786
|
|
|
&& |
7787
|
92 |
|
self::$SUPPORT['mbstring'] === true |
7788
|
|
|
) { |
7789
|
87 |
|
if (\function_exists('mb_str_split')) { |
7790
|
|
|
/** |
7791
|
|
|
* @psalm-suppress ImpureFunctionCall - why? |
7792
|
|
|
*/ |
7793
|
87 |
|
$return = \mb_str_split($input, $length); |
7794
|
87 |
|
if ($return !== false) { |
7795
|
87 |
|
return $return; |
7796
|
|
|
} |
7797
|
|
|
} |
7798
|
|
|
|
7799
|
|
|
$i_max = \mb_strlen($input); |
7800
|
|
|
if ($i_max <= 127) { |
7801
|
|
|
$ret = []; |
7802
|
|
|
for ($i = 0; $i < $i_max; ++$i) { |
7803
|
|
|
$ret[] = \mb_substr($input, $i, 1); |
7804
|
|
|
} |
7805
|
|
|
} else { |
7806
|
|
|
$return_array = []; |
7807
|
|
|
\preg_match_all('/./us', $input, $return_array); |
7808
|
|
|
$ret = $return_array[0] ?? []; |
7809
|
|
|
} |
7810
|
29 |
|
} elseif (self::$SUPPORT['pcre_utf8'] === true) { |
7811
|
22 |
|
$return_array = []; |
7812
|
22 |
|
\preg_match_all('/./us', $input, $return_array); |
7813
|
22 |
|
$ret = $return_array[0] ?? []; |
7814
|
|
|
} else { |
7815
|
|
|
|
7816
|
|
|
// fallback |
7817
|
|
|
|
7818
|
9 |
|
$ret = []; |
7819
|
9 |
|
$len = \strlen($input); |
7820
|
|
|
|
7821
|
9 |
|
for ($i = 0; $i < $len; ++$i) { |
7822
|
9 |
|
if (($input[$i] & "\x80") === "\x00") { |
7823
|
9 |
|
$ret[] = $input[$i]; |
7824
|
|
|
} elseif ( |
7825
|
8 |
|
isset($input[$i + 1]) |
7826
|
|
|
&& |
7827
|
8 |
|
($input[$i] & "\xE0") === "\xC0" |
7828
|
|
|
) { |
7829
|
4 |
|
if (($input[$i + 1] & "\xC0") === "\x80") { |
7830
|
4 |
|
$ret[] = $input[$i] . $input[$i + 1]; |
7831
|
|
|
|
7832
|
4 |
|
++$i; |
7833
|
|
|
} |
7834
|
|
|
} elseif ( |
7835
|
6 |
|
isset($input[$i + 2]) |
7836
|
|
|
&& |
7837
|
6 |
|
($input[$i] & "\xF0") === "\xE0" |
7838
|
|
|
) { |
7839
|
|
|
if ( |
7840
|
6 |
|
($input[$i + 1] & "\xC0") === "\x80" |
7841
|
|
|
&& |
7842
|
6 |
|
($input[$i + 2] & "\xC0") === "\x80" |
7843
|
|
|
) { |
7844
|
6 |
|
$ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2]; |
7845
|
|
|
|
7846
|
6 |
|
$i += 2; |
7847
|
|
|
} |
7848
|
|
|
} elseif ( |
7849
|
|
|
isset($input[$i + 3]) |
7850
|
|
|
&& |
7851
|
|
|
($input[$i] & "\xF8") === "\xF0" |
7852
|
|
|
) { |
7853
|
|
|
if ( |
7854
|
|
|
($input[$i + 1] & "\xC0") === "\x80" |
7855
|
|
|
&& |
7856
|
|
|
($input[$i + 2] & "\xC0") === "\x80" |
7857
|
|
|
&& |
7858
|
|
|
($input[$i + 3] & "\xC0") === "\x80" |
7859
|
|
|
) { |
7860
|
|
|
$ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3]; |
7861
|
|
|
|
7862
|
|
|
$i += 3; |
7863
|
|
|
} |
7864
|
|
|
} |
7865
|
|
|
} |
7866
|
|
|
} |
7867
|
|
|
|
7868
|
29 |
|
if ($length > 1) { |
7869
|
2 |
|
return \array_map( |
7870
|
2 |
|
static function (array $item): string { |
7871
|
2 |
|
return \implode('', $item); |
7872
|
2 |
|
}, |
7873
|
2 |
|
\array_chunk($ret, $length) |
7874
|
|
|
); |
7875
|
|
|
} |
7876
|
|
|
|
7877
|
29 |
|
if (isset($ret[0]) && $ret[0] === '') { |
7878
|
|
|
return []; |
7879
|
|
|
} |
7880
|
|
|
|
7881
|
29 |
|
return $ret; |
7882
|
|
|
} |
7883
|
|
|
|
7884
|
|
|
/** |
7885
|
|
|
* Splits the string with the provided regular expression, returning an |
7886
|
|
|
* array of strings. An optional integer $limit will truncate the |
7887
|
|
|
* results. |
7888
|
|
|
* |
7889
|
|
|
* @param string $str |
7890
|
|
|
* @param string $pattern <p>The regex with which to split the string.</p> |
7891
|
|
|
* @param int $limit [optional] <p>Maximum number of results to return. Default: -1 === no limit</p> |
7892
|
|
|
* |
7893
|
|
|
* @psalm-pure |
7894
|
|
|
* |
7895
|
|
|
* @return string[] |
7896
|
|
|
* <p>An array of strings.</p> |
7897
|
|
|
*/ |
7898
|
16 |
|
public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array |
7899
|
|
|
{ |
7900
|
16 |
|
if ($limit === 0) { |
7901
|
2 |
|
return []; |
7902
|
|
|
} |
7903
|
|
|
|
7904
|
14 |
|
if ($pattern === '') { |
7905
|
1 |
|
return [$str]; |
7906
|
|
|
} |
7907
|
|
|
|
7908
|
13 |
|
if (self::$SUPPORT['mbstring'] === true) { |
7909
|
13 |
|
if ($limit >= 0) { |
7910
|
8 |
|
$result_tmp = \mb_split($pattern, $str); |
7911
|
8 |
|
if ($result_tmp === false) { |
7912
|
|
|
return []; |
7913
|
|
|
} |
7914
|
|
|
|
7915
|
8 |
|
$result = []; |
7916
|
8 |
|
foreach ($result_tmp as $item_tmp) { |
7917
|
8 |
|
if ($limit === 0) { |
7918
|
4 |
|
break; |
7919
|
|
|
} |
7920
|
8 |
|
--$limit; |
7921
|
|
|
|
7922
|
8 |
|
$result[] = $item_tmp; |
7923
|
|
|
} |
7924
|
|
|
|
7925
|
8 |
|
return $result; |
7926
|
|
|
} |
7927
|
|
|
|
7928
|
5 |
|
$result = \mb_split($pattern, $str); |
7929
|
5 |
|
if ($result === false) { |
7930
|
|
|
return []; |
7931
|
|
|
} |
7932
|
|
|
|
7933
|
5 |
|
return $result; |
7934
|
|
|
} |
7935
|
|
|
|
7936
|
|
|
if ($limit > 0) { |
7937
|
|
|
++$limit; |
7938
|
|
|
} else { |
7939
|
|
|
$limit = -1; |
7940
|
|
|
} |
7941
|
|
|
|
7942
|
|
|
$array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit); |
7943
|
|
|
if ($array === false) { |
7944
|
|
|
return []; |
7945
|
|
|
} |
7946
|
|
|
|
7947
|
|
|
if ($limit > 0 && \count($array) === $limit) { |
7948
|
|
|
\array_pop($array); |
7949
|
|
|
} |
7950
|
|
|
|
7951
|
|
|
return $array; |
7952
|
|
|
} |
7953
|
|
|
|
7954
|
|
|
/** |
7955
|
|
|
* Check if the string starts with the given substring. |
7956
|
|
|
* |
7957
|
|
|
* EXAMPLE: <code> |
7958
|
|
|
* UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true |
7959
|
|
|
* UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false |
7960
|
|
|
* </code> |
7961
|
|
|
* |
7962
|
|
|
* @param string $haystack <p>The string to search in.</p> |
7963
|
|
|
* @param string $needle <p>The substring to search for.</p> |
7964
|
|
|
* |
7965
|
|
|
* @psalm-pure |
7966
|
|
|
* |
7967
|
|
|
* @return bool |
7968
|
|
|
*/ |
7969
|
19 |
|
public static function str_starts_with(string $haystack, string $needle): bool |
7970
|
|
|
{ |
7971
|
19 |
|
if ($needle === '') { |
7972
|
2 |
|
return true; |
7973
|
|
|
} |
7974
|
|
|
|
7975
|
19 |
|
if ($haystack === '') { |
7976
|
1 |
|
return false; |
7977
|
|
|
} |
7978
|
|
|
|
7979
|
19 |
|
if (\PHP_VERSION_ID >= 80000) { |
7980
|
|
|
/** @phpstan-ignore-next-line - only for PHP8 */ |
7981
|
19 |
|
return \str_starts_with($haystack, $needle); |
7982
|
|
|
} |
7983
|
|
|
|
7984
|
|
|
return \strncmp($haystack, $needle, \strlen($needle)) === 0; |
7985
|
|
|
} |
7986
|
|
|
|
7987
|
|
|
/** |
7988
|
|
|
* Returns true if the string begins with any of $substrings, false otherwise. |
7989
|
|
|
* |
7990
|
|
|
* - case-sensitive |
7991
|
|
|
* |
7992
|
|
|
* @param string $str <p>The input string.</p> |
7993
|
|
|
* @param array $substrings <p>Substrings to look for.</p> |
7994
|
|
|
* |
7995
|
|
|
* @psalm-pure |
7996
|
|
|
* |
7997
|
|
|
* @return bool |
7998
|
|
|
* <p>Whether or not $str starts with $substring.</p> |
7999
|
|
|
*/ |
8000
|
8 |
|
public static function str_starts_with_any(string $str, array $substrings): bool |
8001
|
|
|
{ |
8002
|
8 |
|
if ($str === '') { |
8003
|
|
|
return false; |
8004
|
|
|
} |
8005
|
|
|
|
8006
|
8 |
|
if ($substrings === []) { |
8007
|
|
|
return false; |
8008
|
|
|
} |
8009
|
|
|
|
8010
|
8 |
|
foreach ($substrings as &$substring) { |
8011
|
8 |
|
if (self::str_starts_with($str, $substring)) { |
8012
|
2 |
|
return true; |
8013
|
|
|
} |
8014
|
|
|
} |
8015
|
|
|
|
8016
|
6 |
|
return false; |
8017
|
|
|
} |
8018
|
|
|
|
8019
|
|
|
/** |
8020
|
|
|
* Gets the substring after the first occurrence of a separator. |
8021
|
|
|
* |
8022
|
|
|
* @param string $str <p>The input string.</p> |
8023
|
|
|
* @param string $separator <p>The string separator.</p> |
8024
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
8025
|
|
|
* |
8026
|
|
|
* @psalm-pure |
8027
|
|
|
* |
8028
|
|
|
* @return string |
8029
|
|
|
*/ |
8030
|
1 |
|
public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
8031
|
|
|
{ |
8032
|
1 |
|
if ($separator === '' || $str === '') { |
8033
|
1 |
|
return ''; |
8034
|
|
|
} |
8035
|
|
|
|
8036
|
1 |
|
if ($encoding === 'UTF-8') { |
8037
|
1 |
|
$offset = \mb_strpos($str, $separator); |
8038
|
1 |
|
if ($offset === false) { |
8039
|
1 |
|
return ''; |
8040
|
|
|
} |
8041
|
|
|
|
8042
|
1 |
|
return (string) \mb_substr( |
8043
|
1 |
|
$str, |
8044
|
1 |
|
$offset + (int) \mb_strlen($separator) |
8045
|
|
|
); |
8046
|
|
|
} |
8047
|
|
|
|
8048
|
|
|
$offset = self::strpos($str, $separator, 0, $encoding); |
8049
|
|
|
if ($offset === false) { |
8050
|
|
|
return ''; |
8051
|
|
|
} |
8052
|
|
|
|
8053
|
|
|
return (string) \mb_substr( |
8054
|
|
|
$str, |
8055
|
|
|
$offset + (int) self::strlen($separator, $encoding), |
8056
|
|
|
null, |
8057
|
|
|
$encoding |
8058
|
|
|
); |
8059
|
|
|
} |
8060
|
|
|
|
8061
|
|
|
/** |
8062
|
|
|
* Gets the substring after the last occurrence of a separator. |
8063
|
|
|
* |
8064
|
|
|
* @param string $str <p>The input string.</p> |
8065
|
|
|
* @param string $separator <p>The string separator.</p> |
8066
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
8067
|
|
|
* |
8068
|
|
|
* @psalm-pure |
8069
|
|
|
* |
8070
|
|
|
* @return string |
8071
|
|
|
*/ |
8072
|
1 |
|
public static function str_substr_after_last_separator( |
8073
|
|
|
string $str, |
8074
|
|
|
string $separator, |
8075
|
|
|
string $encoding = 'UTF-8' |
8076
|
|
|
): string { |
8077
|
1 |
|
if ($separator === '' || $str === '') { |
8078
|
1 |
|
return ''; |
8079
|
|
|
} |
8080
|
|
|
|
8081
|
1 |
|
if ($encoding === 'UTF-8') { |
8082
|
1 |
|
$offset = \mb_strrpos($str, $separator); |
8083
|
1 |
|
if ($offset === false) { |
8084
|
1 |
|
return ''; |
8085
|
|
|
} |
8086
|
|
|
|
8087
|
1 |
|
return (string) \mb_substr( |
8088
|
1 |
|
$str, |
8089
|
1 |
|
$offset + (int) \mb_strlen($separator) |
8090
|
|
|
); |
8091
|
|
|
} |
8092
|
|
|
|
8093
|
|
|
$offset = self::strrpos($str, $separator, 0, $encoding); |
8094
|
|
|
if ($offset === false) { |
8095
|
|
|
return ''; |
8096
|
|
|
} |
8097
|
|
|
|
8098
|
|
|
return (string) self::substr( |
8099
|
|
|
$str, |
8100
|
|
|
$offset + (int) self::strlen($separator, $encoding), |
8101
|
|
|
null, |
8102
|
|
|
$encoding |
8103
|
|
|
); |
8104
|
|
|
} |
8105
|
|
|
|
8106
|
|
|
/** |
8107
|
|
|
* Gets the substring before the first occurrence of a separator. |
8108
|
|
|
* |
8109
|
|
|
* @param string $str <p>The input string.</p> |
8110
|
|
|
* @param string $separator <p>The string separator.</p> |
8111
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
8112
|
|
|
* |
8113
|
|
|
* @psalm-pure |
8114
|
|
|
* |
8115
|
|
|
* @return string |
8116
|
|
|
*/ |
8117
|
1 |
|
public static function str_substr_before_first_separator( |
8118
|
|
|
string $str, |
8119
|
|
|
string $separator, |
8120
|
|
|
string $encoding = 'UTF-8' |
8121
|
|
|
): string { |
8122
|
1 |
|
if ($separator === '' || $str === '') { |
8123
|
1 |
|
return ''; |
8124
|
|
|
} |
8125
|
|
|
|
8126
|
1 |
|
if ($encoding === 'UTF-8') { |
8127
|
1 |
|
$offset = \mb_strpos($str, $separator); |
8128
|
1 |
|
if ($offset === false) { |
8129
|
1 |
|
return ''; |
8130
|
|
|
} |
8131
|
|
|
|
8132
|
1 |
|
return (string) \mb_substr( |
8133
|
1 |
|
$str, |
8134
|
1 |
|
0, |
8135
|
1 |
|
$offset |
8136
|
|
|
); |
8137
|
|
|
} |
8138
|
|
|
|
8139
|
|
|
$offset = self::strpos($str, $separator, 0, $encoding); |
8140
|
|
|
if ($offset === false) { |
8141
|
|
|
return ''; |
8142
|
|
|
} |
8143
|
|
|
|
8144
|
|
|
return (string) self::substr( |
8145
|
|
|
$str, |
8146
|
|
|
0, |
8147
|
|
|
$offset, |
8148
|
|
|
$encoding |
8149
|
|
|
); |
8150
|
|
|
} |
8151
|
|
|
|
8152
|
|
|
/** |
8153
|
|
|
* Gets the substring before the last occurrence of a separator. |
8154
|
|
|
* |
8155
|
|
|
* @param string $str <p>The input string.</p> |
8156
|
|
|
* @param string $separator <p>The string separator.</p> |
8157
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
8158
|
|
|
* |
8159
|
|
|
* @psalm-pure |
8160
|
|
|
* |
8161
|
|
|
* @return string |
8162
|
|
|
*/ |
8163
|
1 |
|
public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
8164
|
|
|
{ |
8165
|
1 |
|
if ($separator === '' || $str === '') { |
8166
|
1 |
|
return ''; |
8167
|
|
|
} |
8168
|
|
|
|
8169
|
1 |
|
if ($encoding === 'UTF-8') { |
8170
|
1 |
|
$offset = \mb_strrpos($str, $separator); |
8171
|
1 |
|
if ($offset === false) { |
8172
|
1 |
|
return ''; |
8173
|
|
|
} |
8174
|
|
|
|
8175
|
1 |
|
return (string) \mb_substr( |
8176
|
1 |
|
$str, |
8177
|
1 |
|
0, |
8178
|
1 |
|
$offset |
8179
|
|
|
); |
8180
|
|
|
} |
8181
|
|
|
|
8182
|
|
|
$offset = self::strrpos($str, $separator, 0, $encoding); |
8183
|
|
|
if ($offset === false) { |
8184
|
|
|
return ''; |
8185
|
|
|
} |
8186
|
|
|
|
8187
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8188
|
|
|
|
8189
|
|
|
return (string) self::substr( |
8190
|
|
|
$str, |
8191
|
|
|
0, |
8192
|
|
|
$offset, |
8193
|
|
|
$encoding |
8194
|
|
|
); |
8195
|
|
|
} |
8196
|
|
|
|
8197
|
|
|
/** |
8198
|
|
|
* Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle". |
8199
|
|
|
* |
8200
|
|
|
* @param string $str <p>The input string.</p> |
8201
|
|
|
* @param string $needle <p>The string to look for.</p> |
8202
|
|
|
* @param bool $before_needle [optional] <p>Default: false</p> |
8203
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
8204
|
|
|
* |
8205
|
|
|
* @psalm-pure |
8206
|
|
|
* |
8207
|
|
|
* @return string |
8208
|
|
|
*/ |
8209
|
2 |
|
public static function str_substr_first( |
8210
|
|
|
string $str, |
8211
|
|
|
string $needle, |
8212
|
|
|
bool $before_needle = false, |
8213
|
|
|
string $encoding = 'UTF-8' |
8214
|
|
|
): string { |
8215
|
2 |
|
if ($str === '' || $needle === '') { |
8216
|
2 |
|
return ''; |
8217
|
|
|
} |
8218
|
|
|
|
8219
|
2 |
|
if ($encoding === 'UTF-8') { |
8220
|
2 |
|
if ($before_needle) { |
8221
|
1 |
|
$part = \mb_strstr( |
8222
|
1 |
|
$str, |
8223
|
1 |
|
$needle, |
8224
|
1 |
|
$before_needle |
8225
|
|
|
); |
8226
|
|
|
} else { |
8227
|
2 |
|
$part = \mb_strstr( |
8228
|
1 |
|
$str, |
8229
|
1 |
|
$needle |
8230
|
|
|
); |
8231
|
|
|
} |
8232
|
|
|
} else { |
8233
|
|
|
$part = self::strstr( |
8234
|
|
|
$str, |
8235
|
|
|
$needle, |
8236
|
|
|
$before_needle, |
8237
|
|
|
$encoding |
8238
|
|
|
); |
8239
|
|
|
} |
8240
|
|
|
|
8241
|
2 |
|
return $part === false ? '' : $part; |
8242
|
|
|
} |
8243
|
|
|
|
8244
|
|
|
/** |
8245
|
|
|
* Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle". |
8246
|
|
|
* |
8247
|
|
|
* @param string $str <p>The input string.</p> |
8248
|
|
|
* @param string $needle <p>The string to look for.</p> |
8249
|
|
|
* @param bool $before_needle [optional] <p>Default: false</p> |
8250
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
8251
|
|
|
* |
8252
|
|
|
* @psalm-pure |
8253
|
|
|
* |
8254
|
|
|
* @return string |
8255
|
|
|
*/ |
8256
|
2 |
|
public static function str_substr_last( |
8257
|
|
|
string $str, |
8258
|
|
|
string $needle, |
8259
|
|
|
bool $before_needle = false, |
8260
|
|
|
string $encoding = 'UTF-8' |
8261
|
|
|
): string { |
8262
|
2 |
|
if ($str === '' || $needle === '') { |
8263
|
2 |
|
return ''; |
8264
|
|
|
} |
8265
|
|
|
|
8266
|
2 |
|
if ($encoding === 'UTF-8') { |
8267
|
2 |
|
if ($before_needle) { |
8268
|
1 |
|
$part = \mb_strrchr( |
8269
|
1 |
|
$str, |
8270
|
1 |
|
$needle, |
8271
|
1 |
|
$before_needle |
8272
|
|
|
); |
8273
|
|
|
} else { |
8274
|
2 |
|
$part = \mb_strrchr( |
8275
|
1 |
|
$str, |
8276
|
1 |
|
$needle |
8277
|
|
|
); |
8278
|
|
|
} |
8279
|
|
|
} else { |
8280
|
|
|
$part = self::strrchr( |
8281
|
|
|
$str, |
8282
|
|
|
$needle, |
8283
|
|
|
$before_needle, |
8284
|
|
|
$encoding |
8285
|
|
|
); |
8286
|
|
|
} |
8287
|
|
|
|
8288
|
2 |
|
return $part === false ? '' : $part; |
8289
|
|
|
} |
8290
|
|
|
|
8291
|
|
|
/** |
8292
|
|
|
* Surrounds $str with the given substring. |
8293
|
|
|
* |
8294
|
|
|
* @param string $str |
8295
|
|
|
* @param string $substring <p>The substring to add to both sides.</p> |
8296
|
|
|
* |
8297
|
|
|
* @psalm-pure |
8298
|
|
|
* |
8299
|
|
|
* @return string |
8300
|
|
|
* <p>A string with the substring both prepended and appended.</p> |
8301
|
|
|
*/ |
8302
|
5 |
|
public static function str_surround(string $str, string $substring): string |
8303
|
|
|
{ |
8304
|
5 |
|
return $substring . $str . $substring; |
8305
|
|
|
} |
8306
|
|
|
|
8307
|
|
|
/** |
8308
|
|
|
* Returns a trimmed string with the first letter of each word capitalized. |
8309
|
|
|
* Also accepts an array, $ignore, allowing you to list words not to be |
8310
|
|
|
* capitalized. |
8311
|
|
|
* |
8312
|
|
|
* @param string $str |
8313
|
|
|
* @param array|string[]|null $ignore [optional] <p>An array of words not to capitalize or |
8314
|
|
|
* null. Default: null</p> |
8315
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
8316
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the |
8317
|
|
|
* string.</p> |
8318
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, |
8319
|
|
|
* el, lt, tr</p> |
8320
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: |
8321
|
|
|
* e.g. ẞ -> ß</p> |
8322
|
|
|
* @param bool $use_trim_first [optional] <p>true === trim the input string, |
8323
|
|
|
* first</p> |
8324
|
|
|
* @param string|null $word_define_chars [optional] <p>An string of chars that will be used as |
8325
|
|
|
* whitespace separator === words.</p> |
8326
|
|
|
* |
8327
|
|
|
* @psalm-pure |
8328
|
|
|
* |
8329
|
|
|
* @return string |
8330
|
|
|
* <p>The titleized string.</p> |
8331
|
|
|
*/ |
8332
|
10 |
|
public static function str_titleize( |
8333
|
|
|
string $str, |
8334
|
|
|
array $ignore = null, |
8335
|
|
|
string $encoding = 'UTF-8', |
8336
|
|
|
bool $clean_utf8 = false, |
8337
|
|
|
string $lang = null, |
8338
|
|
|
bool $try_to_keep_the_string_length = false, |
8339
|
|
|
bool $use_trim_first = true, |
8340
|
|
|
string $word_define_chars = null |
8341
|
|
|
): string { |
8342
|
10 |
|
if ($str === '') { |
8343
|
|
|
return ''; |
8344
|
|
|
} |
8345
|
|
|
|
8346
|
10 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
8347
|
9 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8348
|
|
|
} |
8349
|
|
|
|
8350
|
10 |
|
if ($use_trim_first) { |
8351
|
10 |
|
$str = \trim($str); |
8352
|
|
|
} |
8353
|
|
|
|
8354
|
10 |
|
if ($clean_utf8) { |
8355
|
|
|
$str = self::clean($str); |
8356
|
|
|
} |
8357
|
|
|
|
8358
|
10 |
|
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length; |
8359
|
|
|
|
8360
|
10 |
|
if ($word_define_chars) { |
8361
|
4 |
|
$word_define_chars = \preg_quote($word_define_chars, '/'); |
8362
|
|
|
} else { |
8363
|
6 |
|
$word_define_chars = ''; |
8364
|
|
|
} |
8365
|
|
|
|
8366
|
10 |
|
$str = (string) \preg_replace_callback( |
8367
|
10 |
|
'/([^\\s' . $word_define_chars . ']+)/u', |
8368
|
10 |
|
static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string { |
8369
|
10 |
|
if ($ignore !== null && \in_array($match[0], $ignore, true)) { |
8370
|
4 |
|
return $match[0]; |
8371
|
|
|
} |
8372
|
|
|
|
8373
|
10 |
|
if ($use_mb_functions) { |
8374
|
10 |
|
if ($encoding === 'UTF-8') { |
8375
|
10 |
|
return \mb_strtoupper(\mb_substr($match[0], 0, 1)) |
8376
|
10 |
|
. \mb_strtolower(\mb_substr($match[0], 1)); |
8377
|
|
|
} |
8378
|
|
|
|
8379
|
|
|
return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding) |
8380
|
|
|
. \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding); |
8381
|
|
|
} |
8382
|
|
|
|
8383
|
|
|
return self::ucfirst( |
8384
|
|
|
self::strtolower( |
8385
|
|
|
$match[0], |
8386
|
|
|
$encoding, |
8387
|
|
|
false, |
8388
|
|
|
$lang, |
8389
|
|
|
$try_to_keep_the_string_length |
8390
|
|
|
), |
8391
|
|
|
$encoding, |
8392
|
|
|
false, |
8393
|
|
|
$lang, |
8394
|
|
|
$try_to_keep_the_string_length |
8395
|
|
|
); |
8396
|
10 |
|
}, |
8397
|
10 |
|
$str |
8398
|
|
|
); |
8399
|
|
|
|
8400
|
10 |
|
return $str; |
8401
|
|
|
} |
8402
|
|
|
|
8403
|
|
|
/** |
8404
|
|
|
* Convert a string into a obfuscate string. |
8405
|
|
|
* |
8406
|
|
|
* EXAMPLE: <code> |
8407
|
|
|
* |
8408
|
|
|
* UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*" |
8409
|
|
|
* </code> |
8410
|
|
|
* |
8411
|
|
|
* @param string $str |
8412
|
|
|
* @param float $percent |
8413
|
|
|
* @param string $obfuscateChar |
8414
|
|
|
* @param string[] $keepChars |
8415
|
|
|
* |
8416
|
|
|
* @psalm-pure |
8417
|
|
|
* |
8418
|
|
|
* @return string |
8419
|
|
|
* <p>The obfuscate string.</p> |
8420
|
|
|
*/ |
8421
|
1 |
|
public static function str_obfuscate( |
8422
|
|
|
string $str, |
8423
|
|
|
float $percent = 0.5, |
8424
|
|
|
string $obfuscateChar = '*', |
8425
|
|
|
array $keepChars = [] |
8426
|
|
|
): string { |
8427
|
1 |
|
$obfuscateCharHelper = "\u{2603}"; |
8428
|
1 |
|
$str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str); |
8429
|
|
|
|
8430
|
1 |
|
$chars = self::chars($str); |
8431
|
1 |
|
$charsMax = \count($chars); |
8432
|
1 |
|
$charsMaxChange = \round($charsMax * $percent); |
8433
|
1 |
|
$charsCounter = 0; |
8434
|
1 |
|
$charKeyDone = []; |
8435
|
|
|
|
8436
|
1 |
|
while ($charsCounter < $charsMaxChange) { |
8437
|
1 |
|
foreach ($chars as $charKey => $char) { |
8438
|
1 |
|
if (isset($charKeyDone[$charKey])) { |
8439
|
1 |
|
continue; |
8440
|
|
|
} |
8441
|
|
|
|
8442
|
1 |
|
if (\random_int(0, 100) > 50) { |
8443
|
1 |
|
continue; |
8444
|
|
|
} |
8445
|
|
|
|
8446
|
1 |
|
if ($char === $obfuscateChar) { |
8447
|
|
|
continue; |
8448
|
|
|
} |
8449
|
|
|
|
8450
|
1 |
|
++$charsCounter; |
8451
|
1 |
|
$charKeyDone[$charKey] = true; |
8452
|
|
|
|
8453
|
1 |
|
if ($charsCounter > $charsMaxChange) { |
8454
|
|
|
break; |
8455
|
|
|
} |
8456
|
|
|
|
8457
|
1 |
|
if (\in_array($char, $keepChars, true)) { |
8458
|
1 |
|
continue; |
8459
|
|
|
} |
8460
|
|
|
|
8461
|
1 |
|
$chars[$charKey] = $obfuscateChar; |
8462
|
|
|
} |
8463
|
|
|
} |
8464
|
|
|
|
8465
|
1 |
|
$str = \implode('', $chars); |
8466
|
|
|
|
8467
|
1 |
|
return \str_replace($obfuscateCharHelper, $obfuscateChar, $str); |
8468
|
|
|
} |
8469
|
|
|
|
8470
|
|
|
/** |
8471
|
|
|
* Returns a trimmed string in proper title case. |
8472
|
|
|
* |
8473
|
|
|
* Also accepts an array, $ignore, allowing you to list words not to be |
8474
|
|
|
* capitalized. |
8475
|
|
|
* |
8476
|
|
|
* Adapted from John Gruber's script. |
8477
|
|
|
* |
8478
|
|
|
* @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78 |
8479
|
|
|
* |
8480
|
|
|
* @param string $str |
8481
|
|
|
* @param array $ignore <p>An array of words not to capitalize.</p> |
8482
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8483
|
|
|
* |
8484
|
|
|
* @psalm-pure |
8485
|
|
|
* |
8486
|
|
|
* @return string |
8487
|
|
|
* <p>The titleized string.</p> |
8488
|
|
|
*/ |
8489
|
35 |
|
public static function str_titleize_for_humans( |
8490
|
|
|
string $str, |
8491
|
|
|
array $ignore = [], |
8492
|
|
|
string $encoding = 'UTF-8' |
8493
|
|
|
): string { |
8494
|
35 |
|
if ($str === '') { |
8495
|
|
|
return ''; |
8496
|
|
|
} |
8497
|
|
|
|
8498
|
35 |
|
$small_words = [ |
8499
|
|
|
'(?<!q&)a', |
8500
|
|
|
'an', |
8501
|
|
|
'and', |
8502
|
|
|
'as', |
8503
|
|
|
'at(?!&t)', |
8504
|
|
|
'but', |
8505
|
|
|
'by', |
8506
|
|
|
'en', |
8507
|
|
|
'for', |
8508
|
|
|
'if', |
8509
|
|
|
'in', |
8510
|
|
|
'of', |
8511
|
|
|
'on', |
8512
|
|
|
'or', |
8513
|
|
|
'the', |
8514
|
|
|
'to', |
8515
|
|
|
'v[.]?', |
8516
|
|
|
'via', |
8517
|
|
|
'vs[.]?', |
8518
|
|
|
]; |
8519
|
|
|
|
8520
|
35 |
|
if ($ignore !== []) { |
8521
|
1 |
|
$small_words = \array_merge($small_words, $ignore); |
8522
|
|
|
} |
8523
|
|
|
|
8524
|
35 |
|
$small_words_rx = \implode('|', $small_words); |
8525
|
35 |
|
$apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?'; |
8526
|
|
|
|
8527
|
35 |
|
$str = \trim($str); |
8528
|
|
|
|
8529
|
35 |
|
if (!self::has_lowercase($str)) { |
8530
|
2 |
|
$str = self::strtolower($str, $encoding); |
8531
|
|
|
} |
8532
|
|
|
|
8533
|
|
|
// the main substitutions |
8534
|
35 |
|
$str = (string) \preg_replace_callback( |
8535
|
|
|
'~\\b (_*) (?: # 1. Leading underscore and |
8536
|
|
|
( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ | # 2. file path or |
8537
|
35 |
|
[-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' ) # URL, domain, or email |
8538
|
|
|
| |
8539
|
35 |
|
( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' ) # 3. or small word (case-insensitive) |
8540
|
|
|
| |
8541
|
35 |
|
( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' ) # 4. or word w/o internal caps |
8542
|
|
|
| |
8543
|
35 |
|
( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' ) # 5. or some other word |
8544
|
|
|
) (_*) \\b # 6. With trailing underscore |
8545
|
|
|
~ux', |
8546
|
|
|
/** |
8547
|
|
|
* @param string[] $matches |
8548
|
|
|
* |
8549
|
|
|
* @psalm-pure |
8550
|
|
|
* |
8551
|
|
|
* @return string |
8552
|
|
|
*/ |
8553
|
35 |
|
static function (array $matches) use ($encoding): string { |
8554
|
|
|
// preserve leading underscore |
8555
|
35 |
|
$str = $matches[1]; |
8556
|
35 |
|
if ($matches[2]) { |
8557
|
|
|
// preserve URLs, domains, emails and file paths |
8558
|
5 |
|
$str .= $matches[2]; |
8559
|
35 |
|
} elseif ($matches[3]) { |
8560
|
|
|
// lower-case small words |
8561
|
25 |
|
$str .= self::strtolower($matches[3], $encoding); |
8562
|
35 |
|
} elseif ($matches[4]) { |
8563
|
|
|
// capitalize word w/o internal caps |
8564
|
34 |
|
$str .= static::ucfirst($matches[4], $encoding); |
8565
|
|
|
} else { |
8566
|
|
|
// preserve other kinds of word (iPhone) |
8567
|
7 |
|
$str .= $matches[5]; |
8568
|
|
|
} |
8569
|
|
|
// preserve trailing underscore |
8570
|
35 |
|
$str .= $matches[6]; |
8571
|
|
|
|
8572
|
35 |
|
return $str; |
8573
|
35 |
|
}, |
8574
|
35 |
|
$str |
8575
|
|
|
); |
8576
|
|
|
|
8577
|
|
|
// Exceptions for small words: capitalize at start of title... |
8578
|
35 |
|
$str = (string) \preg_replace_callback( |
8579
|
|
|
'~( \\A [[:punct:]]* # start of title... |
8580
|
|
|
| [:.;?!][ ]+ # or of subsentence... |
8581
|
|
|
| [ ][\'"“‘(\[][ ]* ) # or of inserted subphrase... |
8582
|
35 |
|
( ' . $small_words_rx . ' ) \\b # ...followed by small word |
8583
|
|
|
~uxi', |
8584
|
|
|
/** |
8585
|
|
|
* @param string[] $matches |
8586
|
|
|
* |
8587
|
|
|
* @psalm-pure |
8588
|
|
|
* |
8589
|
|
|
* @return string |
8590
|
|
|
*/ |
8591
|
35 |
|
static function (array $matches) use ($encoding): string { |
8592
|
11 |
|
return $matches[1] . static::ucfirst($matches[2], $encoding); |
8593
|
35 |
|
}, |
8594
|
35 |
|
$str |
8595
|
|
|
); |
8596
|
|
|
|
8597
|
|
|
// ...and end of title |
8598
|
35 |
|
$str = (string) \preg_replace_callback( |
8599
|
35 |
|
'~\\b ( ' . $small_words_rx . ' ) # small word... |
8600
|
|
|
(?= [[:punct:]]* \Z # ...at the end of the title... |
8601
|
|
|
| [\'"’”)\]] [ ] ) # ...or of an inserted subphrase? |
8602
|
|
|
~uxi', |
8603
|
|
|
/** |
8604
|
|
|
* @param string[] $matches |
8605
|
|
|
* |
8606
|
|
|
* @psalm-pure |
8607
|
|
|
* |
8608
|
|
|
* @return string |
8609
|
|
|
*/ |
8610
|
35 |
|
static function (array $matches) use ($encoding): string { |
8611
|
3 |
|
return static::ucfirst($matches[1], $encoding); |
8612
|
35 |
|
}, |
8613
|
35 |
|
$str |
8614
|
|
|
); |
8615
|
|
|
|
8616
|
|
|
// Exceptions for small words in hyphenated compound words. |
8617
|
|
|
// e.g. "in-flight" -> In-Flight |
8618
|
35 |
|
$str = (string) \preg_replace_callback( |
8619
|
|
|
'~\\b |
8620
|
|
|
(?<! -) # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight) |
8621
|
35 |
|
( ' . $small_words_rx . ' ) |
8622
|
|
|
(?= -[[:alpha:]]+) # lookahead for "-someword" |
8623
|
|
|
~uxi', |
8624
|
|
|
/** |
8625
|
|
|
* @param string[] $matches |
8626
|
|
|
* |
8627
|
|
|
* @psalm-pure |
8628
|
|
|
* |
8629
|
|
|
* @return string |
8630
|
|
|
*/ |
8631
|
35 |
|
static function (array $matches) use ($encoding): string { |
8632
|
|
|
return static::ucfirst($matches[1], $encoding); |
8633
|
35 |
|
}, |
8634
|
35 |
|
$str |
8635
|
|
|
); |
8636
|
|
|
|
8637
|
|
|
// e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point) |
8638
|
35 |
|
$str = (string) \preg_replace_callback( |
8639
|
|
|
'~\\b |
8640
|
|
|
(?<!…) # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in) |
8641
|
|
|
( [[:alpha:]]+- ) # $1 = first word and hyphen, should already be properly capped |
8642
|
35 |
|
( ' . $small_words_rx . ' ) # ...followed by small word |
8643
|
|
|
(?! - ) # Negative lookahead for another - |
8644
|
|
|
~uxi', |
8645
|
|
|
/** |
8646
|
|
|
* @param string[] $matches |
8647
|
|
|
* |
8648
|
|
|
* @psalm-pure |
8649
|
|
|
* |
8650
|
|
|
* @return string |
8651
|
|
|
*/ |
8652
|
35 |
|
static function (array $matches) use ($encoding): string { |
8653
|
|
|
return $matches[1] . static::ucfirst($matches[2], $encoding); |
8654
|
35 |
|
}, |
8655
|
35 |
|
$str |
8656
|
|
|
); |
8657
|
|
|
|
8658
|
35 |
|
return $str; |
8659
|
|
|
} |
8660
|
|
|
|
8661
|
|
|
/** |
8662
|
|
|
* Get a binary representation of a specific string. |
8663
|
|
|
* |
8664
|
|
|
* EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code> |
8665
|
|
|
* |
8666
|
|
|
* @param string $str <p>The input string.</p> |
8667
|
|
|
* |
8668
|
|
|
* @psalm-pure |
8669
|
|
|
* |
8670
|
|
|
* @return false|string |
8671
|
|
|
* <p>false on error</p> |
8672
|
|
|
*/ |
8673
|
2 |
|
public static function str_to_binary(string $str) |
8674
|
|
|
{ |
8675
|
|
|
/** @var array|false $value - needed for PhpStan (stubs error) */ |
8676
|
2 |
|
$value = \unpack('H*', $str); |
8677
|
2 |
|
if ($value === false) { |
8678
|
|
|
return false; |
8679
|
|
|
} |
8680
|
|
|
|
8681
|
|
|
/** @noinspection OffsetOperationsInspection */ |
8682
|
2 |
|
return \base_convert($value[1], 16, 2); |
8683
|
|
|
} |
8684
|
|
|
|
8685
|
|
|
/** |
8686
|
|
|
* @param string $str |
8687
|
|
|
* @param bool $remove_empty_values <p>Remove empty values.</p> |
8688
|
|
|
* @param int|null $remove_short_values <p>The min. string length or null to disable</p> |
8689
|
|
|
* |
8690
|
|
|
* @psalm-pure |
8691
|
|
|
* |
8692
|
|
|
* @return string[] |
8693
|
|
|
*/ |
8694
|
17 |
|
public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array |
8695
|
|
|
{ |
8696
|
17 |
|
if ($str === '') { |
8697
|
1 |
|
return $remove_empty_values ? [] : ['']; |
8698
|
|
|
} |
8699
|
|
|
|
8700
|
16 |
|
if (self::$SUPPORT['mbstring'] === true) { |
8701
|
16 |
|
$return = \mb_split("[\r\n]{1,2}", $str); |
8702
|
|
|
} else { |
8703
|
|
|
$return = \preg_split("/[\r\n]{1,2}/u", $str); |
8704
|
|
|
} |
8705
|
|
|
|
8706
|
16 |
|
if ($return === false) { |
8707
|
|
|
return $remove_empty_values ? [] : ['']; |
8708
|
|
|
} |
8709
|
|
|
|
8710
|
|
|
if ( |
8711
|
16 |
|
$remove_short_values === null |
8712
|
|
|
&& |
8713
|
16 |
|
!$remove_empty_values |
8714
|
|
|
) { |
8715
|
16 |
|
return $return; |
8716
|
|
|
} |
8717
|
|
|
|
8718
|
|
|
return self::reduce_string_array( |
8719
|
|
|
$return, |
8720
|
|
|
$remove_empty_values, |
8721
|
|
|
$remove_short_values |
8722
|
|
|
); |
8723
|
|
|
} |
8724
|
|
|
|
8725
|
|
|
/** |
8726
|
|
|
* Convert a string into an array of words. |
8727
|
|
|
* |
8728
|
|
|
* EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code> |
8729
|
|
|
* |
8730
|
|
|
* @param string $str |
8731
|
|
|
* @param string $char_list <p>Additional chars for the definition of "words".</p> |
8732
|
|
|
* @param bool $remove_empty_values <p>Remove empty values.</p> |
8733
|
|
|
* @param int|null $remove_short_values <p>The min. string length or null to disable</p> |
8734
|
|
|
* |
8735
|
|
|
* @psalm-pure |
8736
|
|
|
* |
8737
|
|
|
* @return string[] |
8738
|
|
|
*/ |
8739
|
16 |
|
public static function str_to_words( |
8740
|
|
|
string $str, |
8741
|
|
|
string $char_list = '', |
8742
|
|
|
bool $remove_empty_values = false, |
8743
|
|
|
int $remove_short_values = null |
8744
|
|
|
): array { |
8745
|
16 |
|
if ($str === '') { |
8746
|
4 |
|
return $remove_empty_values ? [] : ['']; |
8747
|
|
|
} |
8748
|
|
|
|
8749
|
16 |
|
$char_list = self::rxClass($char_list, '\pL'); |
8750
|
|
|
|
8751
|
16 |
|
$return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE); |
8752
|
16 |
|
if ($return === false) { |
8753
|
|
|
return $remove_empty_values ? [] : ['']; |
8754
|
|
|
} |
8755
|
|
|
|
8756
|
|
|
if ( |
8757
|
16 |
|
$remove_short_values === null |
8758
|
|
|
&& |
8759
|
16 |
|
!$remove_empty_values |
8760
|
|
|
) { |
8761
|
16 |
|
return $return; |
|
|
|
|
8762
|
|
|
} |
8763
|
|
|
|
8764
|
2 |
|
$tmp_return = self::reduce_string_array( |
8765
|
2 |
|
$return, |
8766
|
|
|
$remove_empty_values, |
8767
|
|
|
$remove_short_values |
8768
|
|
|
); |
8769
|
|
|
|
8770
|
2 |
|
foreach ($tmp_return as &$item) { |
8771
|
2 |
|
$item = (string) $item; |
8772
|
|
|
} |
8773
|
|
|
|
8774
|
2 |
|
return $tmp_return; |
8775
|
|
|
} |
8776
|
|
|
|
8777
|
|
|
/** |
8778
|
|
|
* Truncates the string to a given length. If $substring is provided, and |
8779
|
|
|
* truncating occurs, the string is further truncated so that the substring |
8780
|
|
|
* may be appended without exceeding the desired length. |
8781
|
|
|
* |
8782
|
|
|
* @param string $str |
8783
|
|
|
* @param int $length <p>Desired length of the truncated string.</p> |
8784
|
|
|
* @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p> |
8785
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
8786
|
|
|
* |
8787
|
|
|
* @psalm-pure |
8788
|
|
|
* |
8789
|
|
|
* @return string |
8790
|
|
|
* <p>A string after truncating.</p> |
8791
|
|
|
*/ |
8792
|
22 |
|
public static function str_truncate( |
8793
|
|
|
string $str, |
8794
|
|
|
int $length, |
8795
|
|
|
string $substring = '', |
8796
|
|
|
string $encoding = 'UTF-8' |
8797
|
|
|
): string { |
8798
|
22 |
|
if ($str === '') { |
8799
|
|
|
return ''; |
8800
|
|
|
} |
8801
|
|
|
|
8802
|
22 |
|
if ($encoding === 'UTF-8') { |
8803
|
10 |
|
if ($length >= (int) \mb_strlen($str)) { |
8804
|
2 |
|
return $str; |
8805
|
|
|
} |
8806
|
|
|
|
8807
|
8 |
|
if ($substring !== '') { |
8808
|
4 |
|
$length -= (int) \mb_strlen($substring); |
8809
|
|
|
|
8810
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
8811
|
4 |
|
return (string) \mb_substr($str, 0, $length) . $substring; |
8812
|
|
|
} |
8813
|
|
|
|
8814
|
4 |
|
return (string) \mb_substr($str, 0, $length); |
8815
|
|
|
} |
8816
|
|
|
|
8817
|
12 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8818
|
|
|
|
8819
|
12 |
|
if ($length >= (int) self::strlen($str, $encoding)) { |
8820
|
2 |
|
return $str; |
8821
|
|
|
} |
8822
|
|
|
|
8823
|
10 |
|
if ($substring !== '') { |
8824
|
6 |
|
$length -= (int) self::strlen($substring, $encoding); |
8825
|
|
|
} |
8826
|
|
|
|
8827
|
|
|
return ( |
8828
|
10 |
|
(string) self::substr( |
8829
|
10 |
|
$str, |
8830
|
10 |
|
0, |
8831
|
|
|
$length, |
8832
|
|
|
$encoding |
8833
|
|
|
) |
8834
|
10 |
|
) . $substring; |
8835
|
|
|
} |
8836
|
|
|
|
8837
|
|
|
/** |
8838
|
|
|
* Truncates the string to a given length, while ensuring that it does not |
8839
|
|
|
* split words. If $substring is provided, and truncating occurs, the |
8840
|
|
|
* string is further truncated so that the substring may be appended without |
8841
|
|
|
* exceeding the desired length. |
8842
|
|
|
* |
8843
|
|
|
* @param string $str |
8844
|
|
|
* @param int $length <p>Desired length of the truncated string.</p> |
8845
|
|
|
* @param string $substring [optional] <p>The substring to append if it can fit. |
8846
|
|
|
* Default: |
8847
|
|
|
* ''</p> |
8848
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
8849
|
|
|
* @param bool $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p> |
8850
|
|
|
* |
8851
|
|
|
* @psalm-pure |
8852
|
|
|
* |
8853
|
|
|
* @return string |
8854
|
|
|
* <p>A string after truncating.</p> |
8855
|
|
|
*/ |
8856
|
47 |
|
public static function str_truncate_safe( |
8857
|
|
|
string $str, |
8858
|
|
|
int $length, |
8859
|
|
|
string $substring = '', |
8860
|
|
|
string $encoding = 'UTF-8', |
8861
|
|
|
bool $ignore_do_not_split_words_for_one_word = false |
8862
|
|
|
): string { |
8863
|
47 |
|
if ($str === '' || $length <= 0) { |
8864
|
1 |
|
return $substring; |
8865
|
|
|
} |
8866
|
|
|
|
8867
|
47 |
|
if ($encoding === 'UTF-8') { |
8868
|
21 |
|
if ($length >= (int) \mb_strlen($str)) { |
8869
|
5 |
|
return $str; |
8870
|
|
|
} |
8871
|
|
|
|
8872
|
|
|
// need to further trim the string so we can append the substring |
8873
|
17 |
|
$length -= (int) \mb_strlen($substring); |
8874
|
17 |
|
if ($length <= 0) { |
8875
|
1 |
|
return $substring; |
8876
|
|
|
} |
8877
|
|
|
|
8878
|
|
|
/** @var false|string $truncated - needed for PhpStan (stubs error) */ |
8879
|
17 |
|
$truncated = \mb_substr($str, 0, $length); |
8880
|
17 |
|
if ($truncated === false) { |
8881
|
|
|
return ''; |
8882
|
|
|
} |
8883
|
|
|
|
8884
|
|
|
// if the last word was truncated |
8885
|
17 |
|
$space_position = \mb_strpos($str, ' ', $length - 1); |
8886
|
17 |
|
if ($space_position !== $length) { |
8887
|
|
|
// find pos of the last occurrence of a space, get up to that |
8888
|
13 |
|
$last_position = \mb_strrpos($truncated, ' ', 0); |
8889
|
|
|
|
8890
|
|
|
if ( |
8891
|
13 |
|
$last_position !== false |
8892
|
|
|
|| |
8893
|
|
|
( |
8894
|
3 |
|
$space_position !== false |
8895
|
|
|
&& |
8896
|
13 |
|
!$ignore_do_not_split_words_for_one_word |
8897
|
|
|
) |
8898
|
|
|
) { |
8899
|
17 |
|
$truncated = (string) \mb_substr($truncated, 0, (int) $last_position); |
8900
|
|
|
} |
8901
|
|
|
} |
8902
|
|
|
} else { |
8903
|
26 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8904
|
|
|
|
8905
|
26 |
|
if ($length >= (int) self::strlen($str, $encoding)) { |
8906
|
4 |
|
return $str; |
8907
|
|
|
} |
8908
|
|
|
|
8909
|
|
|
// need to further trim the string so we can append the substring |
8910
|
22 |
|
$length -= (int) self::strlen($substring, $encoding); |
8911
|
22 |
|
if ($length <= 0) { |
8912
|
|
|
return $substring; |
8913
|
|
|
} |
8914
|
|
|
|
8915
|
22 |
|
$truncated = self::substr($str, 0, $length, $encoding); |
8916
|
|
|
|
8917
|
22 |
|
if ($truncated === false) { |
8918
|
|
|
return ''; |
8919
|
|
|
} |
8920
|
|
|
|
8921
|
|
|
// if the last word was truncated |
8922
|
22 |
|
$space_position = self::strpos($str, ' ', $length - 1, $encoding); |
8923
|
22 |
|
if ($space_position !== $length) { |
8924
|
|
|
// find pos of the last occurrence of a space, get up to that |
8925
|
12 |
|
$last_position = self::strrpos($truncated, ' ', 0, $encoding); |
8926
|
|
|
|
8927
|
|
|
if ( |
8928
|
12 |
|
$last_position !== false |
8929
|
|
|
|| |
8930
|
|
|
( |
8931
|
4 |
|
$space_position !== false |
8932
|
|
|
&& |
8933
|
12 |
|
!$ignore_do_not_split_words_for_one_word |
8934
|
|
|
) |
8935
|
|
|
) { |
8936
|
9 |
|
$truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding); |
8937
|
|
|
} |
8938
|
|
|
} |
8939
|
|
|
} |
8940
|
|
|
|
8941
|
39 |
|
return $truncated . $substring; |
8942
|
|
|
} |
8943
|
|
|
|
8944
|
|
|
/** |
8945
|
|
|
* Returns a lowercase and trimmed string separated by underscores. |
8946
|
|
|
* Underscores are inserted before uppercase characters (with the exception |
8947
|
|
|
* of the first character of the string), and in place of spaces as well as |
8948
|
|
|
* dashes. |
8949
|
|
|
* |
8950
|
|
|
* @param string $str |
8951
|
|
|
* |
8952
|
|
|
* @psalm-pure |
8953
|
|
|
* |
8954
|
|
|
* @return string |
8955
|
|
|
* <p>The underscored string.</p> |
8956
|
|
|
*/ |
8957
|
16 |
|
public static function str_underscored(string $str): string |
8958
|
|
|
{ |
8959
|
16 |
|
return self::str_delimit($str, '_'); |
8960
|
|
|
} |
8961
|
|
|
|
8962
|
|
|
/** |
8963
|
|
|
* Returns an UpperCamelCase version of the supplied string. It trims |
8964
|
|
|
* surrounding spaces, capitalizes letters following digits, spaces, dashes |
8965
|
|
|
* and underscores, and removes spaces, dashes, underscores. |
8966
|
|
|
* |
8967
|
|
|
* @param string $str <p>The input string.</p> |
8968
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
8969
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
8970
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
8971
|
|
|
* tr</p> |
8972
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
8973
|
|
|
* -> ß</p> |
8974
|
|
|
* |
8975
|
|
|
* @psalm-pure |
8976
|
|
|
* |
8977
|
|
|
* @return string |
8978
|
|
|
* <p>A string in UpperCamelCase.</p> |
8979
|
|
|
*/ |
8980
|
13 |
|
public static function str_upper_camelize( |
8981
|
|
|
string $str, |
8982
|
|
|
string $encoding = 'UTF-8', |
8983
|
|
|
bool $clean_utf8 = false, |
8984
|
|
|
string $lang = null, |
8985
|
|
|
bool $try_to_keep_the_string_length = false |
8986
|
|
|
): string { |
8987
|
13 |
|
return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); |
8988
|
|
|
} |
8989
|
|
|
|
8990
|
|
|
/** |
8991
|
|
|
* Get the number of words in a specific string. |
8992
|
|
|
* |
8993
|
|
|
* EXAMPLES: <code> |
8994
|
|
|
* // format: 0 -> return only word count (int) |
8995
|
|
|
* // |
8996
|
|
|
* UTF8::str_word_count('中文空白 öäü abc#c'); // 4 |
8997
|
|
|
* UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3 |
8998
|
|
|
* |
8999
|
|
|
* // format: 1 -> return words (array) |
9000
|
|
|
* // |
9001
|
|
|
* UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c') |
9002
|
|
|
* UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c') |
9003
|
|
|
* |
9004
|
|
|
* // format: 2 -> return words with offset (array) |
9005
|
|
|
* // |
9006
|
|
|
* UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c') |
9007
|
|
|
* UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c') |
9008
|
|
|
* </code> |
9009
|
|
|
* |
9010
|
|
|
* @param string $str <p>The input string.</p> |
9011
|
|
|
* @param int $format [optional] <p> |
9012
|
|
|
* <strong>0</strong> => return a number of words (default)<br> |
9013
|
|
|
* <strong>1</strong> => return an array of words<br> |
9014
|
|
|
* <strong>2</strong> => return an array of words with word-offset as key |
9015
|
|
|
* </p> |
9016
|
|
|
* @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p> |
9017
|
|
|
* |
9018
|
|
|
* @psalm-pure |
9019
|
|
|
* |
9020
|
|
|
* @return int|string[] |
9021
|
|
|
* <p>The number of words in the string.</p> |
9022
|
|
|
*/ |
9023
|
2 |
|
public static function str_word_count(string $str, int $format = 0, string $char_list = '') |
9024
|
|
|
{ |
9025
|
2 |
|
$str_parts = self::str_to_words($str, $char_list); |
9026
|
|
|
|
9027
|
2 |
|
$len = \count($str_parts); |
9028
|
|
|
|
9029
|
2 |
|
if ($format === 1) { |
9030
|
2 |
|
$number_of_words = []; |
9031
|
2 |
|
for ($i = 1; $i < $len; $i += 2) { |
9032
|
2 |
|
$number_of_words[] = $str_parts[$i]; |
9033
|
|
|
} |
9034
|
2 |
|
} elseif ($format === 2) { |
9035
|
2 |
|
$number_of_words = []; |
9036
|
2 |
|
$offset = (int) self::strlen($str_parts[0]); |
9037
|
2 |
|
for ($i = 1; $i < $len; $i += 2) { |
9038
|
2 |
|
$number_of_words[$offset] = $str_parts[$i]; |
9039
|
2 |
|
$offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]); |
9040
|
|
|
} |
9041
|
|
|
} else { |
9042
|
2 |
|
$number_of_words = (int) (($len - 1) / 2); |
9043
|
|
|
} |
9044
|
|
|
|
9045
|
2 |
|
return $number_of_words; |
9046
|
|
|
} |
9047
|
|
|
|
9048
|
|
|
/** |
9049
|
|
|
* Case-insensitive string comparison. |
9050
|
|
|
* |
9051
|
|
|
* INFO: Case-insensitive version of UTF8::strcmp() |
9052
|
|
|
* |
9053
|
|
|
* EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code> |
9054
|
|
|
* |
9055
|
|
|
* @param string $str1 <p>The first string.</p> |
9056
|
|
|
* @param string $str2 <p>The second string.</p> |
9057
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9058
|
|
|
* |
9059
|
|
|
* @psalm-pure |
9060
|
|
|
* |
9061
|
|
|
* @return int |
9062
|
|
|
* <strong>< 0</strong> if str1 is less than str2;<br> |
9063
|
|
|
* <strong>> 0</strong> if str1 is greater than str2,<br> |
9064
|
|
|
* <strong>0</strong> if they are equal |
9065
|
|
|
*/ |
9066
|
23 |
|
public static function strcasecmp( |
9067
|
|
|
string $str1, |
9068
|
|
|
string $str2, |
9069
|
|
|
string $encoding = 'UTF-8' |
9070
|
|
|
): int { |
9071
|
23 |
|
return self::strcmp( |
9072
|
23 |
|
self::strtocasefold( |
9073
|
23 |
|
$str1, |
9074
|
23 |
|
true, |
9075
|
23 |
|
false, |
9076
|
|
|
$encoding, |
9077
|
23 |
|
null, |
9078
|
23 |
|
false |
9079
|
|
|
), |
9080
|
23 |
|
self::strtocasefold( |
9081
|
23 |
|
$str2, |
9082
|
23 |
|
true, |
9083
|
23 |
|
false, |
9084
|
|
|
$encoding, |
9085
|
23 |
|
null, |
9086
|
23 |
|
false |
9087
|
|
|
) |
9088
|
|
|
); |
9089
|
|
|
} |
9090
|
|
|
|
9091
|
|
|
/** |
9092
|
|
|
* Case-sensitive string comparison. |
9093
|
|
|
* |
9094
|
|
|
* EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code> |
9095
|
|
|
* |
9096
|
|
|
* @param string $str1 <p>The first string.</p> |
9097
|
|
|
* @param string $str2 <p>The second string.</p> |
9098
|
|
|
* |
9099
|
|
|
* @psalm-pure |
9100
|
|
|
* |
9101
|
|
|
* @return int |
9102
|
|
|
* <strong>< 0</strong> if str1 is less than str2<br> |
9103
|
|
|
* <strong>> 0</strong> if str1 is greater than str2<br> |
9104
|
|
|
* <strong>0</strong> if they are equal |
9105
|
|
|
*/ |
9106
|
29 |
|
public static function strcmp(string $str1, string $str2): int |
9107
|
|
|
{ |
9108
|
29 |
|
if ($str1 === $str2) { |
9109
|
21 |
|
return 0; |
9110
|
|
|
} |
9111
|
|
|
|
9112
|
24 |
|
return \strcmp( |
9113
|
|
|
/** @phpstan-ignore-next-line - we use only NFD */ |
9114
|
24 |
|
\Normalizer::normalize($str1, \Normalizer::NFD), |
9115
|
|
|
/** @phpstan-ignore-next-line - we use only NFD */ |
9116
|
24 |
|
\Normalizer::normalize($str2, \Normalizer::NFD) |
9117
|
|
|
); |
9118
|
|
|
} |
9119
|
|
|
|
9120
|
|
|
/** |
9121
|
|
|
* Find length of initial segment not matching mask. |
9122
|
|
|
* |
9123
|
|
|
* @param string $str |
9124
|
|
|
* @param string $char_list |
9125
|
|
|
* @param int $offset |
9126
|
|
|
* @param int|null $length |
9127
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9128
|
|
|
* |
9129
|
|
|
* @psalm-pure |
9130
|
|
|
* |
9131
|
|
|
* @return int |
9132
|
|
|
*/ |
9133
|
12 |
|
public static function strcspn( |
9134
|
|
|
string $str, |
9135
|
|
|
string $char_list, |
9136
|
|
|
int $offset = 0, |
9137
|
|
|
int $length = null, |
9138
|
|
|
string $encoding = 'UTF-8' |
9139
|
|
|
): int { |
9140
|
12 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9141
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9142
|
|
|
} |
9143
|
|
|
|
9144
|
12 |
|
if ($char_list === '') { |
9145
|
2 |
|
return (int) self::strlen($str, $encoding); |
9146
|
|
|
} |
9147
|
|
|
|
9148
|
11 |
|
if ($offset || $length !== null) { |
9149
|
3 |
|
if ($encoding === 'UTF-8') { |
9150
|
3 |
|
if ($length === null) { |
9151
|
2 |
|
$str_tmp = \mb_substr($str, $offset); |
9152
|
|
|
} else { |
9153
|
3 |
|
$str_tmp = \mb_substr($str, $offset, $length); |
9154
|
|
|
} |
9155
|
|
|
} else { |
9156
|
|
|
$str_tmp = self::substr($str, $offset, $length, $encoding); |
9157
|
|
|
} |
9158
|
|
|
|
9159
|
3 |
|
if ($str_tmp === false) { |
9160
|
|
|
return 0; |
9161
|
|
|
} |
9162
|
|
|
|
9163
|
3 |
|
$str = $str_tmp; |
9164
|
|
|
} |
9165
|
|
|
|
9166
|
11 |
|
if ($str === '') { |
9167
|
2 |
|
return 0; |
9168
|
|
|
} |
9169
|
|
|
|
9170
|
10 |
|
$matches = []; |
9171
|
10 |
|
if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) { |
9172
|
9 |
|
$return = self::strlen($matches[1], $encoding); |
9173
|
9 |
|
if ($return === false) { |
9174
|
|
|
return 0; |
9175
|
|
|
} |
9176
|
|
|
|
9177
|
9 |
|
return $return; |
9178
|
|
|
} |
9179
|
|
|
|
9180
|
2 |
|
return (int) self::strlen($str, $encoding); |
9181
|
|
|
} |
9182
|
|
|
|
9183
|
|
|
/** |
9184
|
|
|
* Create a UTF-8 string from code points. |
9185
|
|
|
* |
9186
|
|
|
* INFO: opposite to UTF8::codepoints() |
9187
|
|
|
* |
9188
|
|
|
* EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code> |
9189
|
|
|
* |
9190
|
|
|
* @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p> |
9191
|
|
|
* |
9192
|
|
|
* @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex |
9193
|
|
|
* |
9194
|
|
|
* @psalm-pure |
9195
|
|
|
* |
9196
|
|
|
* @return string |
9197
|
|
|
* <p>A UTF-8 encoded string.</p> |
9198
|
|
|
*/ |
9199
|
4 |
|
public static function string($intOrHex): string |
9200
|
|
|
{ |
9201
|
4 |
|
if ($intOrHex === []) { |
9202
|
4 |
|
return ''; |
9203
|
|
|
} |
9204
|
|
|
|
9205
|
4 |
|
if (!\is_array($intOrHex)) { |
9206
|
1 |
|
$intOrHex = [$intOrHex]; |
9207
|
|
|
} |
9208
|
|
|
|
9209
|
4 |
|
$str = ''; |
9210
|
4 |
|
foreach ($intOrHex as $strPart) { |
9211
|
4 |
|
$str .= '&#' . (int) $strPart . ';'; |
9212
|
|
|
} |
9213
|
|
|
|
9214
|
|
|
// We cannot use html_entity_decode() here, as it will not return |
9215
|
|
|
// characters for many values < 160. |
9216
|
4 |
|
return mb_convert_encoding($str, 'UTF-8', 'HTML-ENTITIES'); |
|
|
|
|
9217
|
|
|
} |
9218
|
|
|
|
9219
|
|
|
/** |
9220
|
|
|
* Checks if string starts with "BOM" (Byte Order Mark Character) character. |
9221
|
|
|
* |
9222
|
|
|
* EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code> |
9223
|
|
|
* |
9224
|
|
|
* @param string $str <p>The input string.</p> |
9225
|
|
|
* |
9226
|
|
|
* @psalm-pure |
9227
|
|
|
* |
9228
|
|
|
* @return bool |
9229
|
|
|
* <p> |
9230
|
|
|
* <strong>true</strong> if the string has BOM at the start,<br> |
9231
|
|
|
* <strong>false</strong> otherwise |
9232
|
|
|
* </p> |
9233
|
|
|
*/ |
9234
|
40 |
|
public static function string_has_bom(string $str): bool |
9235
|
|
|
{ |
9236
|
40 |
|
foreach (self::$BOM as $bom_string => &$bom_byte_length) { |
9237
|
40 |
|
if (\strncmp($str, $bom_string, $bom_byte_length) === 0) { |
9238
|
13 |
|
return true; |
9239
|
|
|
} |
9240
|
|
|
} |
9241
|
|
|
|
9242
|
40 |
|
return false; |
9243
|
|
|
} |
9244
|
|
|
|
9245
|
|
|
/** |
9246
|
|
|
* Strip HTML and PHP tags from a string + clean invalid UTF-8. |
9247
|
|
|
* |
9248
|
|
|
* EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code> |
9249
|
|
|
* |
9250
|
|
|
* @see http://php.net/manual/en/function.strip-tags.php |
9251
|
|
|
* |
9252
|
|
|
* @param string $str <p> |
9253
|
|
|
* The input string. |
9254
|
|
|
* </p> |
9255
|
|
|
* @param string|null $allowable_tags [optional] <p> |
9256
|
|
|
* You can use the optional second parameter to specify tags which should |
9257
|
|
|
* not be stripped. |
9258
|
|
|
* </p> |
9259
|
|
|
* <p> |
9260
|
|
|
* HTML comments and PHP tags are also stripped. This is hardcoded and |
9261
|
|
|
* can not be changed with allowable_tags. |
9262
|
|
|
* </p> |
9263
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9264
|
|
|
* |
9265
|
|
|
* @psalm-pure |
9266
|
|
|
* |
9267
|
|
|
* @return string |
9268
|
|
|
* <p>The stripped string.</p> |
9269
|
|
|
*/ |
9270
|
4 |
|
public static function strip_tags( |
9271
|
|
|
string $str, |
9272
|
|
|
string $allowable_tags = null, |
9273
|
|
|
bool $clean_utf8 = false |
9274
|
|
|
): string { |
9275
|
4 |
|
if ($str === '') { |
9276
|
1 |
|
return ''; |
9277
|
|
|
} |
9278
|
|
|
|
9279
|
4 |
|
if ($clean_utf8) { |
9280
|
2 |
|
$str = self::clean($str); |
9281
|
|
|
} |
9282
|
|
|
|
9283
|
4 |
|
if ($allowable_tags === null) { |
9284
|
4 |
|
return \strip_tags($str); |
9285
|
|
|
} |
9286
|
|
|
|
9287
|
2 |
|
return \strip_tags($str, $allowable_tags); |
9288
|
|
|
} |
9289
|
|
|
|
9290
|
|
|
/** |
9291
|
|
|
* Strip all whitespace characters. This includes tabs and newline |
9292
|
|
|
* characters, as well as multibyte whitespace such as the thin space |
9293
|
|
|
* and ideographic space. |
9294
|
|
|
* |
9295
|
|
|
* EXAMPLE: <code>UTF8::strip_whitespace(' Ο συγγραφέας '); // 'Οσυγγραφέας'</code> |
9296
|
|
|
* |
9297
|
|
|
* @param string $str |
9298
|
|
|
* |
9299
|
|
|
* @psalm-pure |
9300
|
|
|
* |
9301
|
|
|
* @return string |
9302
|
|
|
*/ |
9303
|
36 |
|
public static function strip_whitespace(string $str): string |
9304
|
|
|
{ |
9305
|
36 |
|
if ($str === '') { |
9306
|
3 |
|
return ''; |
9307
|
|
|
} |
9308
|
|
|
|
9309
|
33 |
|
return (string) \preg_replace('/[[:space:]]+/u', '', $str); |
9310
|
|
|
} |
9311
|
|
|
|
9312
|
|
|
/** |
9313
|
|
|
* Find the position of the first occurrence of a substring in a string, case-insensitive. |
9314
|
|
|
* |
9315
|
|
|
* INFO: use UTF8::stripos_in_byte() for the byte-length |
9316
|
|
|
* |
9317
|
|
|
* EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ) |
9318
|
|
|
* |
9319
|
|
|
* @see http://php.net/manual/en/function.mb-stripos.php |
9320
|
|
|
* |
9321
|
|
|
* @param string $haystack <p>The string from which to get the position of the first occurrence of needle.</p> |
9322
|
|
|
* @param string $needle <p>The string to find in haystack.</p> |
9323
|
|
|
* @param int $offset [optional] <p>The position in haystack to start searching.</p> |
9324
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9325
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9326
|
|
|
* |
9327
|
|
|
* @psalm-pure |
9328
|
|
|
* |
9329
|
|
|
* @return false|int |
9330
|
|
|
* Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the |
9331
|
|
|
* haystack string,<br> or <strong>false</strong> if needle is not found |
9332
|
|
|
*/ |
9333
|
25 |
|
public static function stripos( |
9334
|
|
|
string $haystack, |
9335
|
|
|
string $needle, |
9336
|
|
|
int $offset = 0, |
9337
|
|
|
string $encoding = 'UTF-8', |
9338
|
|
|
bool $clean_utf8 = false |
9339
|
|
|
) { |
9340
|
25 |
|
if ($haystack === '') { |
9341
|
5 |
|
if (\PHP_VERSION_ID >= 80000 && $needle === '') { |
9342
|
2 |
|
return 0; |
9343
|
|
|
} |
9344
|
|
|
|
9345
|
5 |
|
return false; |
9346
|
|
|
} |
9347
|
|
|
|
9348
|
24 |
|
if ($needle === '' && \PHP_VERSION_ID < 80000) { |
9349
|
|
|
return false; |
9350
|
|
|
} |
9351
|
|
|
|
9352
|
24 |
|
if ($clean_utf8) { |
9353
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
9354
|
|
|
// if invalid characters are found in $haystack before $needle |
9355
|
1 |
|
$haystack = self::clean($haystack); |
9356
|
1 |
|
$needle = self::clean($needle); |
9357
|
|
|
} |
9358
|
|
|
|
9359
|
24 |
|
if (self::$SUPPORT['mbstring'] === true) { |
9360
|
24 |
|
if ($encoding === 'UTF-8') { |
9361
|
24 |
|
return \mb_stripos($haystack, $needle, $offset); |
9362
|
|
|
} |
9363
|
|
|
|
9364
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9365
|
|
|
|
9366
|
2 |
|
return \mb_stripos($haystack, $needle, $offset, $encoding); |
9367
|
|
|
} |
9368
|
|
|
|
9369
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9370
|
|
|
|
9371
|
|
|
if ( |
9372
|
2 |
|
$encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings |
9373
|
|
|
&& |
9374
|
2 |
|
$offset >= 0 // grapheme_stripos() can't handle negative offset |
9375
|
|
|
&& |
9376
|
2 |
|
self::$SUPPORT['intl'] === true |
9377
|
|
|
) { |
9378
|
|
|
$return_tmp = \grapheme_stripos($haystack, $needle, $offset); |
9379
|
|
|
if ($return_tmp !== false) { |
9380
|
|
|
return $return_tmp; |
9381
|
|
|
} |
9382
|
|
|
} |
9383
|
|
|
|
9384
|
|
|
// |
9385
|
|
|
// fallback for ascii only |
9386
|
|
|
// |
9387
|
|
|
|
9388
|
2 |
|
if (ASCII::is_ascii($haystack . $needle)) { |
9389
|
2 |
|
return \stripos($haystack, $needle, $offset); |
9390
|
|
|
} |
9391
|
|
|
|
9392
|
|
|
// |
9393
|
|
|
// fallback via vanilla php |
9394
|
|
|
// |
9395
|
|
|
|
9396
|
2 |
|
$haystack = self::strtocasefold($haystack, true, false, $encoding, null, false); |
9397
|
2 |
|
$needle = self::strtocasefold($needle, true, false, $encoding, null, false); |
9398
|
|
|
|
9399
|
2 |
|
return self::strpos($haystack, $needle, $offset, $encoding); |
9400
|
|
|
} |
9401
|
|
|
|
9402
|
|
|
/** |
9403
|
|
|
* Returns all of haystack starting from and including the first occurrence of needle to the end. |
9404
|
|
|
* |
9405
|
|
|
* EXAMPLE: <code> |
9406
|
|
|
* $str = 'iñtërnâtiônàlizætiøn'; |
9407
|
|
|
* $search = 'NÂT'; |
9408
|
|
|
* |
9409
|
|
|
* UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn' |
9410
|
|
|
* UTF8::stristr($str, $search, true)); // 'iñtër' |
9411
|
|
|
* </code> |
9412
|
|
|
* |
9413
|
|
|
* @param string $haystack <p>The input string. Must be valid UTF-8.</p> |
9414
|
|
|
* @param string $needle <p>The string to look for. Must be valid UTF-8.</p> |
9415
|
|
|
* @param bool $before_needle [optional] <p> |
9416
|
|
|
* If <b>TRUE</b>, it returns the part of the |
9417
|
|
|
* haystack before the first occurrence of the needle (excluding the needle). |
9418
|
|
|
* </p> |
9419
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9420
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9421
|
|
|
* |
9422
|
|
|
* @psalm-pure |
9423
|
|
|
* |
9424
|
|
|
* @return false|string |
9425
|
|
|
* <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p> |
9426
|
|
|
*/ |
9427
|
13 |
|
public static function stristr( |
9428
|
|
|
string $haystack, |
9429
|
|
|
string $needle, |
9430
|
|
|
bool $before_needle = false, |
9431
|
|
|
string $encoding = 'UTF-8', |
9432
|
|
|
bool $clean_utf8 = false |
9433
|
|
|
) { |
9434
|
13 |
|
if ($haystack === '') { |
9435
|
3 |
|
if (\PHP_VERSION_ID >= 80000 && $needle === '') { |
9436
|
2 |
|
return ''; |
9437
|
|
|
} |
9438
|
|
|
|
9439
|
2 |
|
return false; |
9440
|
|
|
} |
9441
|
|
|
|
9442
|
11 |
|
if ($clean_utf8) { |
9443
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
9444
|
|
|
// if invalid characters are found in $haystack before $needle |
9445
|
1 |
|
$needle = self::clean($needle); |
9446
|
1 |
|
$haystack = self::clean($haystack); |
9447
|
|
|
} |
9448
|
|
|
|
9449
|
11 |
|
if ($needle === '') { |
9450
|
2 |
|
if (\PHP_VERSION_ID >= 80000) { |
9451
|
2 |
|
return $haystack; |
9452
|
|
|
} |
9453
|
|
|
|
9454
|
|
|
return false; |
9455
|
|
|
} |
9456
|
|
|
|
9457
|
10 |
|
if (self::$SUPPORT['mbstring'] === true) { |
9458
|
10 |
|
if ($encoding === 'UTF-8') { |
9459
|
10 |
|
return \mb_stristr($haystack, $needle, $before_needle); |
9460
|
|
|
} |
9461
|
|
|
|
9462
|
1 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9463
|
|
|
|
9464
|
1 |
|
return \mb_stristr($haystack, $needle, $before_needle, $encoding); |
9465
|
|
|
} |
9466
|
|
|
|
9467
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9468
|
|
|
|
9469
|
|
|
if ( |
9470
|
|
|
$encoding !== 'UTF-8' |
9471
|
|
|
&& |
9472
|
|
|
self::$SUPPORT['mbstring'] === false |
9473
|
|
|
) { |
9474
|
|
|
/** |
9475
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
9476
|
|
|
*/ |
9477
|
|
|
\trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
9478
|
|
|
} |
9479
|
|
|
|
9480
|
|
|
if ( |
9481
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings |
9482
|
|
|
&& |
9483
|
|
|
self::$SUPPORT['intl'] === true |
9484
|
|
|
) { |
9485
|
|
|
$return_tmp = \grapheme_stristr($haystack, $needle, $before_needle); |
9486
|
|
|
if ($return_tmp !== false) { |
9487
|
|
|
return $return_tmp; |
9488
|
|
|
} |
9489
|
|
|
} |
9490
|
|
|
|
9491
|
|
|
if (ASCII::is_ascii($needle . $haystack)) { |
9492
|
|
|
return \stristr($haystack, $needle, $before_needle); |
9493
|
|
|
} |
9494
|
|
|
|
9495
|
|
|
\preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match); |
9496
|
|
|
|
9497
|
|
|
if (!isset($match[1])) { |
9498
|
|
|
return false; |
9499
|
|
|
} |
9500
|
|
|
|
9501
|
|
|
if ($before_needle) { |
9502
|
|
|
return $match[1]; |
9503
|
|
|
} |
9504
|
|
|
|
9505
|
|
|
return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding); |
9506
|
|
|
} |
9507
|
|
|
|
9508
|
|
|
/** |
9509
|
|
|
* Get the string length, not the byte-length! |
9510
|
|
|
* |
9511
|
|
|
* INFO: use UTF8::strwidth() for the char-length |
9512
|
|
|
* |
9513
|
|
|
* EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code> |
9514
|
|
|
* |
9515
|
|
|
* @see http://php.net/manual/en/function.mb-strlen.php |
9516
|
|
|
* |
9517
|
|
|
* @param string $str <p>The string being checked for length.</p> |
9518
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9519
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9520
|
|
|
* |
9521
|
|
|
* @psalm-pure |
9522
|
|
|
* |
9523
|
|
|
* @return false|int |
9524
|
|
|
* <p> |
9525
|
|
|
* The number <strong>(int)</strong> of characters in the string $str having character encoding |
9526
|
|
|
* $encoding. |
9527
|
|
|
* (One multi-byte character counted as +1). |
9528
|
|
|
* <br> |
9529
|
|
|
* Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid |
9530
|
|
|
* chars. |
9531
|
|
|
* </p> |
9532
|
|
|
*/ |
9533
|
174 |
|
public static function strlen( |
9534
|
|
|
string $str, |
9535
|
|
|
string $encoding = 'UTF-8', |
9536
|
|
|
bool $clean_utf8 = false |
9537
|
|
|
) { |
9538
|
174 |
|
if ($str === '') { |
9539
|
25 |
|
return 0; |
9540
|
|
|
} |
9541
|
|
|
|
9542
|
172 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9543
|
12 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9544
|
|
|
} |
9545
|
|
|
|
9546
|
172 |
|
if ($clean_utf8) { |
9547
|
|
|
// "mb_strlen" and "\iconv_strlen" returns wrong length, |
9548
|
|
|
// if invalid characters are found in $str |
9549
|
5 |
|
$str = self::clean($str); |
9550
|
|
|
} |
9551
|
|
|
|
9552
|
|
|
// |
9553
|
|
|
// fallback via mbstring |
9554
|
|
|
// |
9555
|
|
|
|
9556
|
172 |
|
if (self::$SUPPORT['mbstring'] === true) { |
9557
|
166 |
|
if ($encoding === 'UTF-8') { |
9558
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */ |
9559
|
166 |
|
return @\mb_strlen($str); |
9560
|
|
|
} |
9561
|
|
|
|
9562
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */ |
9563
|
4 |
|
return @\mb_strlen($str, $encoding); |
9564
|
|
|
} |
9565
|
|
|
|
9566
|
|
|
// |
9567
|
|
|
// fallback for binary || ascii only |
9568
|
|
|
// |
9569
|
|
|
|
9570
|
|
|
if ( |
9571
|
8 |
|
$encoding === 'CP850' |
9572
|
|
|
|| |
9573
|
8 |
|
$encoding === 'ASCII' |
9574
|
|
|
) { |
9575
|
|
|
return \strlen($str); |
9576
|
|
|
} |
9577
|
|
|
|
9578
|
|
|
if ( |
9579
|
8 |
|
$encoding !== 'UTF-8' |
9580
|
|
|
&& |
9581
|
8 |
|
self::$SUPPORT['mbstring'] === false |
9582
|
|
|
&& |
9583
|
8 |
|
self::$SUPPORT['iconv'] === false |
9584
|
|
|
) { |
9585
|
|
|
/** |
9586
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
9587
|
|
|
*/ |
9588
|
2 |
|
\trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
9589
|
|
|
} |
9590
|
|
|
|
9591
|
|
|
// |
9592
|
|
|
// fallback via iconv |
9593
|
|
|
// |
9594
|
|
|
|
9595
|
8 |
|
if (self::$SUPPORT['iconv'] === true) { |
9596
|
|
|
$return_tmp = \iconv_strlen($str, $encoding); |
9597
|
|
|
if ($return_tmp !== false) { |
9598
|
|
|
return $return_tmp; |
9599
|
|
|
} |
9600
|
|
|
} |
9601
|
|
|
|
9602
|
|
|
// |
9603
|
|
|
// fallback via intl |
9604
|
|
|
// |
9605
|
|
|
|
9606
|
|
|
if ( |
9607
|
8 |
|
$encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings |
9608
|
|
|
&& |
9609
|
8 |
|
self::$SUPPORT['intl'] === true |
9610
|
|
|
) { |
9611
|
|
|
$return_tmp = \grapheme_strlen($str); |
9612
|
|
|
if ($return_tmp !== null) { |
9613
|
|
|
return $return_tmp; |
9614
|
|
|
} |
9615
|
|
|
} |
9616
|
|
|
|
9617
|
|
|
// |
9618
|
|
|
// fallback for ascii only |
9619
|
|
|
// |
9620
|
|
|
|
9621
|
8 |
|
if (ASCII::is_ascii($str)) { |
9622
|
4 |
|
return \strlen($str); |
9623
|
|
|
} |
9624
|
|
|
|
9625
|
|
|
// |
9626
|
|
|
// fallback via vanilla php |
9627
|
|
|
// |
9628
|
|
|
|
9629
|
8 |
|
\preg_match_all('/./us', $str, $parts); |
9630
|
|
|
|
9631
|
8 |
|
$return_tmp = \count($parts[0]); |
9632
|
8 |
|
if ($return_tmp === 0) { |
9633
|
|
|
return false; |
9634
|
|
|
} |
9635
|
|
|
|
9636
|
8 |
|
return $return_tmp; |
9637
|
|
|
} |
9638
|
|
|
|
9639
|
|
|
/** |
9640
|
|
|
* Get string length in byte. |
9641
|
|
|
* |
9642
|
|
|
* @param string $str |
9643
|
|
|
* |
9644
|
|
|
* @psalm-pure |
9645
|
|
|
* |
9646
|
|
|
* @return int |
9647
|
|
|
*/ |
9648
|
1 |
|
public static function strlen_in_byte(string $str): int |
9649
|
|
|
{ |
9650
|
1 |
|
if ($str === '') { |
9651
|
|
|
return 0; |
9652
|
|
|
} |
9653
|
|
|
|
9654
|
1 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
9655
|
|
|
// "mb_" is available if overload is used, so use it ... |
9656
|
|
|
return \mb_strlen($str, 'CP850'); // 8-BIT |
9657
|
|
|
} |
9658
|
|
|
|
9659
|
1 |
|
return \strlen($str); |
9660
|
|
|
} |
9661
|
|
|
|
9662
|
|
|
/** |
9663
|
|
|
* Case-insensitive string comparisons using a "natural order" algorithm. |
9664
|
|
|
* |
9665
|
|
|
* INFO: natural order version of UTF8::strcasecmp() |
9666
|
|
|
* |
9667
|
|
|
* EXAMPLES: <code> |
9668
|
|
|
* UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1 |
9669
|
|
|
* UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1 |
9670
|
|
|
* |
9671
|
|
|
* UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1 |
9672
|
|
|
* UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1 |
9673
|
|
|
* </code> |
9674
|
|
|
* |
9675
|
|
|
* @param string $str1 <p>The first string.</p> |
9676
|
|
|
* @param string $str2 <p>The second string.</p> |
9677
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9678
|
|
|
* |
9679
|
|
|
* @psalm-pure |
9680
|
|
|
* |
9681
|
|
|
* @return int |
9682
|
|
|
* <strong>< 0</strong> if str1 is less than str2<br> |
9683
|
|
|
* <strong>> 0</strong> if str1 is greater than str2<br> |
9684
|
|
|
* <strong>0</strong> if they are equal |
9685
|
|
|
*/ |
9686
|
2 |
|
public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int |
9687
|
|
|
{ |
9688
|
2 |
|
return self::strnatcmp( |
9689
|
2 |
|
self::strtocasefold($str1, true, false, $encoding, null, false), |
9690
|
2 |
|
self::strtocasefold($str2, true, false, $encoding, null, false) |
9691
|
|
|
); |
9692
|
|
|
} |
9693
|
|
|
|
9694
|
|
|
/** |
9695
|
|
|
* String comparisons using a "natural order" algorithm |
9696
|
|
|
* |
9697
|
|
|
* INFO: natural order version of UTF8::strcmp() |
9698
|
|
|
* |
9699
|
|
|
* EXAMPLES: <code> |
9700
|
|
|
* UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1 |
9701
|
|
|
* UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1 |
9702
|
|
|
* |
9703
|
|
|
* UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1 |
9704
|
|
|
* UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1 |
9705
|
|
|
* </code> |
9706
|
|
|
* |
9707
|
|
|
* @see http://php.net/manual/en/function.strnatcmp.php |
9708
|
|
|
* |
9709
|
|
|
* @param string $str1 <p>The first string.</p> |
9710
|
|
|
* @param string $str2 <p>The second string.</p> |
9711
|
|
|
* |
9712
|
|
|
* @psalm-pure |
9713
|
|
|
* |
9714
|
|
|
* @return int |
9715
|
|
|
* <strong>< 0</strong> if str1 is less than str2;<br> |
9716
|
|
|
* <strong>> 0</strong> if str1 is greater than str2;<br> |
9717
|
|
|
* <strong>0</strong> if they are equal |
9718
|
|
|
*/ |
9719
|
4 |
|
public static function strnatcmp(string $str1, string $str2): int |
9720
|
|
|
{ |
9721
|
4 |
|
if ($str1 === $str2) { |
9722
|
4 |
|
return 0; |
9723
|
|
|
} |
9724
|
|
|
|
9725
|
4 |
|
return \strnatcmp( |
9726
|
4 |
|
(string) self::strtonatfold($str1), |
9727
|
4 |
|
(string) self::strtonatfold($str2) |
9728
|
|
|
); |
9729
|
|
|
} |
9730
|
|
|
|
9731
|
|
|
/** |
9732
|
|
|
* Case-insensitive string comparison of the first n characters. |
9733
|
|
|
* |
9734
|
|
|
* EXAMPLE: <code> |
9735
|
|
|
* UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0 |
9736
|
|
|
* </code> |
9737
|
|
|
* |
9738
|
|
|
* @see http://php.net/manual/en/function.strncasecmp.php |
9739
|
|
|
* |
9740
|
|
|
* @param string $str1 <p>The first string.</p> |
9741
|
|
|
* @param string $str2 <p>The second string.</p> |
9742
|
|
|
* @param int $len <p>The length of strings to be used in the comparison.</p> |
9743
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9744
|
|
|
* |
9745
|
|
|
* @psalm-pure |
9746
|
|
|
* |
9747
|
|
|
* @return int |
9748
|
|
|
* <strong>< 0</strong> if <i>str1</i> is less than <i>str2</i>;<br> |
9749
|
|
|
* <strong>> 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br> |
9750
|
|
|
* <strong>0</strong> if they are equal |
9751
|
|
|
*/ |
9752
|
2 |
|
public static function strncasecmp( |
9753
|
|
|
string $str1, |
9754
|
|
|
string $str2, |
9755
|
|
|
int $len, |
9756
|
|
|
string $encoding = 'UTF-8' |
9757
|
|
|
): int { |
9758
|
2 |
|
return self::strncmp( |
9759
|
2 |
|
self::strtocasefold($str1, true, false, $encoding, null, false), |
9760
|
2 |
|
self::strtocasefold($str2, true, false, $encoding, null, false), |
9761
|
|
|
$len |
9762
|
|
|
); |
9763
|
|
|
} |
9764
|
|
|
|
9765
|
|
|
/** |
9766
|
|
|
* String comparison of the first n characters. |
9767
|
|
|
* |
9768
|
|
|
* EXAMPLE: <code> |
9769
|
|
|
* UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0 |
9770
|
|
|
* </code> |
9771
|
|
|
* |
9772
|
|
|
* @see http://php.net/manual/en/function.strncmp.php |
9773
|
|
|
* |
9774
|
|
|
* @param string $str1 <p>The first string.</p> |
9775
|
|
|
* @param string $str2 <p>The second string.</p> |
9776
|
|
|
* @param int $len <p>Number of characters to use in the comparison.</p> |
9777
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9778
|
|
|
* |
9779
|
|
|
* @psalm-pure |
9780
|
|
|
* |
9781
|
|
|
* @return int |
9782
|
|
|
* <strong>< 0</strong> if <i>str1</i> is less than <i>str2</i>;<br> |
9783
|
|
|
* <strong>> 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br> |
9784
|
|
|
* <strong>0</strong> if they are equal |
9785
|
|
|
*/ |
9786
|
4 |
|
public static function strncmp( |
9787
|
|
|
string $str1, |
9788
|
|
|
string $str2, |
9789
|
|
|
int $len, |
9790
|
|
|
string $encoding = 'UTF-8' |
9791
|
|
|
): int { |
9792
|
4 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9793
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9794
|
|
|
} |
9795
|
|
|
|
9796
|
4 |
|
if ($encoding === 'UTF-8') { |
9797
|
4 |
|
$str1 = (string) \mb_substr($str1, 0, $len); |
9798
|
4 |
|
$str2 = (string) \mb_substr($str2, 0, $len); |
9799
|
|
|
} else { |
9800
|
|
|
$str1 = (string) self::substr($str1, 0, $len, $encoding); |
9801
|
|
|
$str2 = (string) self::substr($str2, 0, $len, $encoding); |
9802
|
|
|
} |
9803
|
|
|
|
9804
|
4 |
|
return self::strcmp($str1, $str2); |
9805
|
|
|
} |
9806
|
|
|
|
9807
|
|
|
/** |
9808
|
|
|
* Search a string for any of a set of characters. |
9809
|
|
|
* |
9810
|
|
|
* EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code> |
9811
|
|
|
* |
9812
|
|
|
* @see http://php.net/manual/en/function.strpbrk.php |
9813
|
|
|
* |
9814
|
|
|
* @param string $haystack <p>The string where char_list is looked for.</p> |
9815
|
|
|
* @param string $char_list <p>This parameter is case-sensitive.</p> |
9816
|
|
|
* |
9817
|
|
|
* @psalm-pure |
9818
|
|
|
* |
9819
|
|
|
* @return false|string |
9820
|
|
|
* <p>The string starting from the character found, or false if it is not found.</p> |
9821
|
|
|
*/ |
9822
|
2 |
|
public static function strpbrk(string $haystack, string $char_list) |
9823
|
|
|
{ |
9824
|
2 |
|
if ($haystack === '' || $char_list === '') { |
9825
|
2 |
|
return false; |
9826
|
|
|
} |
9827
|
|
|
|
9828
|
2 |
|
if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) { |
9829
|
2 |
|
return \substr($haystack, (int) \strpos($haystack, $m[0])); |
9830
|
|
|
} |
9831
|
|
|
|
9832
|
2 |
|
return false; |
9833
|
|
|
} |
9834
|
|
|
|
9835
|
|
|
/** |
9836
|
|
|
* Find the position of the first occurrence of a substring in a string. |
9837
|
|
|
* |
9838
|
|
|
* INFO: use UTF8::strpos_in_byte() for the byte-length |
9839
|
|
|
* |
9840
|
|
|
* EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code> |
9841
|
|
|
* |
9842
|
|
|
* @see http://php.net/manual/en/function.mb-strpos.php |
9843
|
|
|
* |
9844
|
|
|
* @param string $haystack <p>The string from which to get the position of the first occurrence of needle.</p> |
9845
|
|
|
* @param int|string $needle <p>The string to find in haystack.<br>Or a code point as int.</p> |
9846
|
|
|
* @param int $offset [optional] <p>The search offset. If it is not specified, 0 is used.</p> |
9847
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9848
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9849
|
|
|
* |
9850
|
|
|
* @psalm-pure |
9851
|
|
|
* |
9852
|
|
|
* @return false|int |
9853
|
|
|
* The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack |
9854
|
|
|
* string.<br> If needle is not found it returns false. |
9855
|
|
|
*/ |
9856
|
52 |
|
public static function strpos( |
9857
|
|
|
string $haystack, |
9858
|
|
|
$needle, |
9859
|
|
|
int $offset = 0, |
9860
|
|
|
string $encoding = 'UTF-8', |
9861
|
|
|
bool $clean_utf8 = false |
9862
|
|
|
) { |
9863
|
52 |
|
if ($haystack === '') { |
9864
|
4 |
|
if (\PHP_VERSION_ID >= 80000) { |
9865
|
4 |
|
if ($needle === '') { |
9866
|
4 |
|
return 0; |
9867
|
|
|
} |
9868
|
|
|
} else { |
9869
|
|
|
return false; |
9870
|
|
|
} |
9871
|
|
|
} |
9872
|
|
|
|
9873
|
|
|
// iconv and mbstring do not support integer $needle |
9874
|
52 |
|
if ((int) $needle === $needle) { |
9875
|
|
|
$needle = (string) self::chr($needle); |
9876
|
|
|
} |
9877
|
52 |
|
$needle = (string) $needle; |
9878
|
|
|
|
9879
|
52 |
|
if ($haystack === '') { |
9880
|
2 |
|
if (\PHP_VERSION_ID >= 80000 && $needle === '') { |
9881
|
|
|
return 0; |
9882
|
|
|
} |
9883
|
|
|
|
9884
|
2 |
|
return false; |
9885
|
|
|
} |
9886
|
|
|
|
9887
|
51 |
|
if ($needle === '' && \PHP_VERSION_ID < 80000) { |
9888
|
|
|
return false; |
9889
|
|
|
} |
9890
|
|
|
|
9891
|
51 |
|
if ($clean_utf8) { |
9892
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
9893
|
|
|
// if invalid characters are found in $haystack before $needle |
9894
|
3 |
|
$needle = self::clean($needle); |
9895
|
3 |
|
$haystack = self::clean($haystack); |
9896
|
|
|
} |
9897
|
|
|
|
9898
|
51 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9899
|
10 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9900
|
|
|
} |
9901
|
|
|
|
9902
|
|
|
// |
9903
|
|
|
// fallback via mbstring |
9904
|
|
|
// |
9905
|
|
|
|
9906
|
51 |
|
if (self::$SUPPORT['mbstring'] === true) { |
9907
|
49 |
|
if ($encoding === 'UTF-8') { |
9908
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */ |
9909
|
49 |
|
return @\mb_strpos($haystack, $needle, $offset); |
9910
|
|
|
} |
9911
|
|
|
|
9912
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */ |
9913
|
2 |
|
return @\mb_strpos($haystack, $needle, $offset, $encoding); |
9914
|
|
|
} |
9915
|
|
|
|
9916
|
|
|
// |
9917
|
|
|
// fallback for binary || ascii only |
9918
|
|
|
// |
9919
|
|
|
if ( |
9920
|
4 |
|
$encoding === 'CP850' |
9921
|
|
|
|| |
9922
|
4 |
|
$encoding === 'ASCII' |
9923
|
|
|
) { |
9924
|
2 |
|
return \strpos($haystack, $needle, $offset); |
9925
|
|
|
} |
9926
|
|
|
|
9927
|
|
|
if ( |
9928
|
4 |
|
$encoding !== 'UTF-8' |
9929
|
|
|
&& |
9930
|
4 |
|
self::$SUPPORT['iconv'] === false |
9931
|
|
|
&& |
9932
|
4 |
|
self::$SUPPORT['mbstring'] === false |
9933
|
|
|
) { |
9934
|
|
|
/** |
9935
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
9936
|
|
|
*/ |
9937
|
2 |
|
\trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
9938
|
|
|
} |
9939
|
|
|
|
9940
|
|
|
// |
9941
|
|
|
// fallback via intl |
9942
|
|
|
// |
9943
|
|
|
|
9944
|
|
|
if ( |
9945
|
4 |
|
$encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings |
9946
|
|
|
&& |
9947
|
4 |
|
$offset >= 0 // grapheme_strpos() can't handle negative offset |
9948
|
|
|
&& |
9949
|
4 |
|
self::$SUPPORT['intl'] === true |
9950
|
|
|
) { |
9951
|
|
|
$return_tmp = \grapheme_strpos($haystack, $needle, $offset); |
9952
|
|
|
if ($return_tmp !== false) { |
9953
|
|
|
return $return_tmp; |
9954
|
|
|
} |
9955
|
|
|
} |
9956
|
|
|
|
9957
|
|
|
// |
9958
|
|
|
// fallback via iconv |
9959
|
|
|
// |
9960
|
|
|
|
9961
|
|
|
if ( |
9962
|
4 |
|
$offset >= 0 // iconv_strpos() can't handle negative offset |
9963
|
|
|
&& |
9964
|
4 |
|
self::$SUPPORT['iconv'] === true |
9965
|
|
|
) { |
9966
|
|
|
// ignore invalid negative offset to keep compatibility |
9967
|
|
|
// with php < 5.5.35, < 5.6.21, < 7.0.6 |
9968
|
|
|
$return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding); |
9969
|
|
|
if ($return_tmp !== false) { |
9970
|
|
|
return $return_tmp; |
9971
|
|
|
} |
9972
|
|
|
} |
9973
|
|
|
|
9974
|
|
|
// |
9975
|
|
|
// fallback for ascii only |
9976
|
|
|
// |
9977
|
|
|
|
9978
|
4 |
|
if (ASCII::is_ascii($haystack . $needle)) { |
9979
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */ |
9980
|
2 |
|
return @\strpos($haystack, $needle, $offset); |
9981
|
|
|
} |
9982
|
|
|
|
9983
|
|
|
// |
9984
|
|
|
// fallback via vanilla php |
9985
|
|
|
// |
9986
|
|
|
|
9987
|
4 |
|
$haystack_tmp = self::substr($haystack, $offset, null, $encoding); |
9988
|
4 |
|
if ($haystack_tmp === false) { |
9989
|
|
|
$haystack_tmp = ''; |
9990
|
|
|
} |
9991
|
4 |
|
$haystack = (string) $haystack_tmp; |
9992
|
|
|
|
9993
|
4 |
|
if ($offset < 0) { |
9994
|
|
|
$offset = 0; |
9995
|
|
|
} |
9996
|
|
|
|
9997
|
4 |
|
$pos = \strpos($haystack, $needle); |
9998
|
4 |
|
if ($pos === false) { |
9999
|
3 |
|
return false; |
10000
|
|
|
} |
10001
|
|
|
|
10002
|
4 |
|
if ($pos) { |
10003
|
4 |
|
return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding); |
10004
|
|
|
} |
10005
|
|
|
|
10006
|
4 |
|
return $offset + 0; |
10007
|
|
|
} |
10008
|
|
|
|
10009
|
|
|
/** |
10010
|
|
|
* Find the position of the first occurrence of a substring in a string. |
10011
|
|
|
* |
10012
|
|
|
* @param string $haystack <p> |
10013
|
|
|
* The string being checked. |
10014
|
|
|
* </p> |
10015
|
|
|
* @param string $needle <p> |
10016
|
|
|
* The position counted from the beginning of haystack. |
10017
|
|
|
* </p> |
10018
|
|
|
* @param int $offset [optional] <p> |
10019
|
|
|
* The search offset. If it is not specified, 0 is used. |
10020
|
|
|
* </p> |
10021
|
|
|
* |
10022
|
|
|
* @psalm-pure |
10023
|
|
|
* |
10024
|
|
|
* @return false|int |
10025
|
|
|
* <p>The numeric position of the first occurrence of needle in the |
10026
|
|
|
* haystack string. If needle is not found, it returns false.</p> |
10027
|
|
|
*/ |
10028
|
2 |
|
public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0) |
10029
|
|
|
{ |
10030
|
2 |
|
if ($haystack === '' || $needle === '') { |
10031
|
|
|
return false; |
10032
|
|
|
} |
10033
|
|
|
|
10034
|
2 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
10035
|
|
|
// "mb_" is available if overload is used, so use it ... |
10036
|
|
|
return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT |
10037
|
|
|
} |
10038
|
|
|
|
10039
|
2 |
|
return \strpos($haystack, $needle, $offset); |
10040
|
|
|
} |
10041
|
|
|
|
10042
|
|
|
/** |
10043
|
|
|
* Find the position of the first occurrence of a substring in a string, case-insensitive. |
10044
|
|
|
* |
10045
|
|
|
* @param string $haystack <p> |
10046
|
|
|
* The string being checked. |
10047
|
|
|
* </p> |
10048
|
|
|
* @param string $needle <p> |
10049
|
|
|
* The position counted from the beginning of haystack. |
10050
|
|
|
* </p> |
10051
|
|
|
* @param int $offset [optional] <p> |
10052
|
|
|
* The search offset. If it is not specified, 0 is used. |
10053
|
|
|
* </p> |
10054
|
|
|
* |
10055
|
|
|
* @psalm-pure |
10056
|
|
|
* |
10057
|
|
|
* @return false|int |
10058
|
|
|
* <p>The numeric position of the first occurrence of needle in the |
10059
|
|
|
* haystack string. If needle is not found, it returns false.</p> |
10060
|
|
|
*/ |
10061
|
2 |
|
public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0) |
10062
|
|
|
{ |
10063
|
2 |
|
if ($haystack === '' || $needle === '') { |
10064
|
|
|
return false; |
10065
|
|
|
} |
10066
|
|
|
|
10067
|
2 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
10068
|
|
|
// "mb_" is available if overload is used, so use it ... |
10069
|
|
|
return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT |
10070
|
|
|
} |
10071
|
|
|
|
10072
|
2 |
|
return \stripos($haystack, $needle, $offset); |
10073
|
|
|
} |
10074
|
|
|
|
10075
|
|
|
/** |
10076
|
|
|
* Find the last occurrence of a character in a string within another. |
10077
|
|
|
* |
10078
|
|
|
* EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code> |
10079
|
|
|
* |
10080
|
|
|
* @see http://php.net/manual/en/function.mb-strrchr.php |
10081
|
|
|
* |
10082
|
|
|
* @param string $haystack <p>The string from which to get the last occurrence of needle.</p> |
10083
|
|
|
* @param string $needle <p>The string to find in haystack</p> |
10084
|
|
|
* @param bool $before_needle [optional] <p> |
10085
|
|
|
* Determines which portion of haystack |
10086
|
|
|
* this function returns. |
10087
|
|
|
* If set to true, it returns all of haystack |
10088
|
|
|
* from the beginning to the last occurrence of needle. |
10089
|
|
|
* If set to false, it returns all of haystack |
10090
|
|
|
* from the last occurrence of needle to the end, |
10091
|
|
|
* </p> |
10092
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10093
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10094
|
|
|
* |
10095
|
|
|
* @psalm-pure |
10096
|
|
|
* |
10097
|
|
|
* @return false|string |
10098
|
|
|
* <p>The portion of haystack or false if needle is not found.</p> |
10099
|
|
|
*/ |
10100
|
2 |
|
public static function strrchr( |
10101
|
|
|
string $haystack, |
10102
|
|
|
string $needle, |
10103
|
|
|
bool $before_needle = false, |
10104
|
|
|
string $encoding = 'UTF-8', |
10105
|
|
|
bool $clean_utf8 = false |
10106
|
|
|
) { |
10107
|
2 |
|
if ($haystack === '' || $needle === '') { |
10108
|
2 |
|
return false; |
10109
|
|
|
} |
10110
|
|
|
|
10111
|
2 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
10112
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10113
|
|
|
} |
10114
|
|
|
|
10115
|
2 |
|
if ($clean_utf8) { |
10116
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
10117
|
|
|
// if invalid characters are found in $haystack before $needle |
10118
|
2 |
|
$needle = self::clean($needle); |
10119
|
2 |
|
$haystack = self::clean($haystack); |
10120
|
|
|
} |
10121
|
|
|
|
10122
|
|
|
// |
10123
|
|
|
// fallback via mbstring |
10124
|
|
|
// |
10125
|
|
|
|
10126
|
2 |
|
if (self::$SUPPORT['mbstring'] === true) { |
10127
|
2 |
|
if ($encoding === 'UTF-8') { |
10128
|
2 |
|
return \mb_strrchr($haystack, $needle, $before_needle); |
10129
|
|
|
} |
10130
|
|
|
|
10131
|
2 |
|
return \mb_strrchr($haystack, $needle, $before_needle, $encoding); |
10132
|
|
|
} |
10133
|
|
|
|
10134
|
|
|
// |
10135
|
|
|
// fallback for binary || ascii only |
10136
|
|
|
// |
10137
|
|
|
|
10138
|
|
|
if ( |
10139
|
|
|
!$before_needle |
10140
|
|
|
&& |
10141
|
|
|
( |
10142
|
|
|
$encoding === 'CP850' |
10143
|
|
|
|| |
10144
|
|
|
$encoding === 'ASCII' |
10145
|
|
|
) |
10146
|
|
|
) { |
10147
|
|
|
return \strrchr($haystack, $needle); |
10148
|
|
|
} |
10149
|
|
|
|
10150
|
|
|
if ( |
10151
|
|
|
$encoding !== 'UTF-8' |
10152
|
|
|
&& |
10153
|
|
|
self::$SUPPORT['mbstring'] === false |
10154
|
|
|
) { |
10155
|
|
|
/** |
10156
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
10157
|
|
|
*/ |
10158
|
|
|
\trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
10159
|
|
|
} |
10160
|
|
|
|
10161
|
|
|
// |
10162
|
|
|
// fallback via iconv |
10163
|
|
|
// |
10164
|
|
|
|
10165
|
|
|
if (self::$SUPPORT['iconv'] === true) { |
10166
|
|
|
$needle_tmp = self::substr($needle, 0, 1, $encoding); |
10167
|
|
|
if ($needle_tmp === false) { |
10168
|
|
|
return false; |
10169
|
|
|
} |
10170
|
|
|
$needle = $needle_tmp; |
10171
|
|
|
|
10172
|
|
|
$pos = \iconv_strrpos($haystack, $needle, $encoding); |
10173
|
|
|
if ($pos === false) { |
10174
|
|
|
return false; |
10175
|
|
|
} |
10176
|
|
|
|
10177
|
|
|
if ($before_needle) { |
10178
|
|
|
return self::substr($haystack, 0, $pos, $encoding); |
10179
|
|
|
} |
10180
|
|
|
|
10181
|
|
|
return self::substr($haystack, $pos, null, $encoding); |
10182
|
|
|
} |
10183
|
|
|
|
10184
|
|
|
// |
10185
|
|
|
// fallback via vanilla php |
10186
|
|
|
// |
10187
|
|
|
|
10188
|
|
|
$needle_tmp = self::substr($needle, 0, 1, $encoding); |
10189
|
|
|
if ($needle_tmp === false) { |
10190
|
|
|
return false; |
10191
|
|
|
} |
10192
|
|
|
$needle = $needle_tmp; |
10193
|
|
|
|
10194
|
|
|
$pos = self::strrpos($haystack, $needle, 0, $encoding); |
10195
|
|
|
if ($pos === false) { |
10196
|
|
|
return false; |
10197
|
|
|
} |
10198
|
|
|
|
10199
|
|
|
if ($before_needle) { |
10200
|
|
|
return self::substr($haystack, 0, $pos, $encoding); |
10201
|
|
|
} |
10202
|
|
|
|
10203
|
|
|
return self::substr($haystack, $pos, null, $encoding); |
10204
|
|
|
} |
10205
|
|
|
|
10206
|
|
|
/** |
10207
|
|
|
* Reverses characters order in the string. |
10208
|
|
|
* |
10209
|
|
|
* EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code> |
10210
|
|
|
* |
10211
|
|
|
* @param string $str <p>The input string.</p> |
10212
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10213
|
|
|
* |
10214
|
|
|
* @psalm-pure |
10215
|
|
|
* |
10216
|
|
|
* @return string |
10217
|
|
|
* <p>The string with characters in the reverse sequence.</p> |
10218
|
|
|
*/ |
10219
|
10 |
|
public static function strrev(string $str, string $encoding = 'UTF-8'): string |
10220
|
|
|
{ |
10221
|
10 |
|
if ($str === '') { |
10222
|
4 |
|
return ''; |
10223
|
|
|
} |
10224
|
|
|
|
10225
|
|
|
// init |
10226
|
8 |
|
$reversed = ''; |
10227
|
|
|
|
10228
|
8 |
|
$str = self::emoji_encode($str, true); |
10229
|
|
|
|
10230
|
8 |
|
if ($encoding === 'UTF-8') { |
10231
|
8 |
|
if (self::$SUPPORT['intl'] === true) { |
10232
|
|
|
// try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8 |
10233
|
8 |
|
$i = (int) \grapheme_strlen($str); |
10234
|
8 |
|
while ($i--) { |
10235
|
8 |
|
$reversed_tmp = \grapheme_substr($str, $i, 1); |
10236
|
8 |
|
if ($reversed_tmp !== false) { |
10237
|
8 |
|
$reversed .= $reversed_tmp; |
10238
|
|
|
} |
10239
|
|
|
} |
10240
|
|
|
} else { |
10241
|
|
|
$i = (int) \mb_strlen($str); |
10242
|
8 |
|
while ($i--) { |
10243
|
|
|
$reversed_tmp = \mb_substr($str, $i, 1); |
10244
|
|
|
if ($reversed_tmp !== false) { |
10245
|
|
|
$reversed .= $reversed_tmp; |
10246
|
|
|
} |
10247
|
|
|
} |
10248
|
|
|
} |
10249
|
|
|
} else { |
10250
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10251
|
|
|
|
10252
|
|
|
$i = (int) self::strlen($str, $encoding); |
10253
|
|
|
while ($i--) { |
10254
|
|
|
$reversed_tmp = self::substr($str, $i, 1, $encoding); |
10255
|
|
|
if ($reversed_tmp !== false) { |
10256
|
|
|
$reversed .= $reversed_tmp; |
10257
|
|
|
} |
10258
|
|
|
} |
10259
|
|
|
} |
10260
|
|
|
|
10261
|
8 |
|
return self::emoji_decode($reversed, true); |
10262
|
|
|
} |
10263
|
|
|
|
10264
|
|
|
/** |
10265
|
|
|
* Find the last occurrence of a character in a string within another, case-insensitive. |
10266
|
|
|
* |
10267
|
|
|
* EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code> |
10268
|
|
|
* |
10269
|
|
|
* @see http://php.net/manual/en/function.mb-strrichr.php |
10270
|
|
|
* |
10271
|
|
|
* @param string $haystack <p>The string from which to get the last occurrence of needle.</p> |
10272
|
|
|
* @param string $needle <p>The string to find in haystack.</p> |
10273
|
|
|
* @param bool $before_needle [optional] <p> |
10274
|
|
|
* Determines which portion of haystack |
10275
|
|
|
* this function returns. |
10276
|
|
|
* If set to true, it returns all of haystack |
10277
|
|
|
* from the beginning to the last occurrence of needle. |
10278
|
|
|
* If set to false, it returns all of haystack |
10279
|
|
|
* from the last occurrence of needle to the end, |
10280
|
|
|
* </p> |
10281
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10282
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10283
|
|
|
* |
10284
|
|
|
* @psalm-pure |
10285
|
|
|
* |
10286
|
|
|
* @return false|string |
10287
|
|
|
* <p>The portion of haystack or<br>false if needle is not found.</p> |
10288
|
|
|
*/ |
10289
|
3 |
|
public static function strrichr( |
10290
|
|
|
string $haystack, |
10291
|
|
|
string $needle, |
10292
|
|
|
bool $before_needle = false, |
10293
|
|
|
string $encoding = 'UTF-8', |
10294
|
|
|
bool $clean_utf8 = false |
10295
|
|
|
) { |
10296
|
3 |
|
if ($haystack === '' || $needle === '') { |
10297
|
2 |
|
return false; |
10298
|
|
|
} |
10299
|
|
|
|
10300
|
3 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
10301
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10302
|
|
|
} |
10303
|
|
|
|
10304
|
3 |
|
if ($clean_utf8) { |
10305
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
10306
|
|
|
// if invalid characters are found in $haystack before $needle |
10307
|
2 |
|
$needle = self::clean($needle); |
10308
|
2 |
|
$haystack = self::clean($haystack); |
10309
|
|
|
} |
10310
|
|
|
|
10311
|
|
|
// |
10312
|
|
|
// fallback via mbstring |
10313
|
|
|
// |
10314
|
|
|
|
10315
|
3 |
|
if (self::$SUPPORT['mbstring'] === true) { |
10316
|
3 |
|
if ($encoding === 'UTF-8') { |
10317
|
3 |
|
return \mb_strrichr($haystack, $needle, $before_needle); |
10318
|
|
|
} |
10319
|
|
|
|
10320
|
2 |
|
return \mb_strrichr($haystack, $needle, $before_needle, $encoding); |
10321
|
|
|
} |
10322
|
|
|
|
10323
|
|
|
// |
10324
|
|
|
// fallback via vanilla php |
10325
|
|
|
// |
10326
|
|
|
|
10327
|
|
|
$needle_tmp = self::substr($needle, 0, 1, $encoding); |
10328
|
|
|
if ($needle_tmp === false) { |
10329
|
|
|
return false; |
10330
|
|
|
} |
10331
|
|
|
$needle = $needle_tmp; |
10332
|
|
|
|
10333
|
|
|
$pos = self::strripos($haystack, $needle, 0, $encoding); |
10334
|
|
|
if ($pos === false) { |
10335
|
|
|
return false; |
10336
|
|
|
} |
10337
|
|
|
|
10338
|
|
|
if ($before_needle) { |
10339
|
|
|
return self::substr($haystack, 0, $pos, $encoding); |
10340
|
|
|
} |
10341
|
|
|
|
10342
|
|
|
return self::substr($haystack, $pos, null, $encoding); |
10343
|
|
|
} |
10344
|
|
|
|
10345
|
|
|
/** |
10346
|
|
|
* Find the position of the last occurrence of a substring in a string, case-insensitive. |
10347
|
|
|
* |
10348
|
|
|
* EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code> |
10349
|
|
|
* |
10350
|
|
|
* @param string $haystack <p>The string to look in.</p> |
10351
|
|
|
* @param int|string $needle <p>The string to look for.</p> |
10352
|
|
|
* @param int $offset [optional] <p>Number of characters to ignore in the beginning or end.</p> |
10353
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10354
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10355
|
|
|
* |
10356
|
|
|
* @psalm-pure |
10357
|
|
|
* |
10358
|
|
|
* @return false|int |
10359
|
|
|
* <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack |
10360
|
|
|
* string.<br>If needle is not found, it returns false.</p> |
10361
|
|
|
*/ |
10362
|
14 |
|
public static function strripos( |
10363
|
|
|
string $haystack, |
10364
|
|
|
$needle, |
10365
|
|
|
int $offset = 0, |
10366
|
|
|
string $encoding = 'UTF-8', |
10367
|
|
|
bool $clean_utf8 = false |
10368
|
|
|
) { |
10369
|
14 |
|
if ($haystack === '') { |
10370
|
3 |
|
if (\PHP_VERSION_ID >= 80000) { |
10371
|
3 |
|
if ($needle === '') { |
10372
|
3 |
|
return 0; |
10373
|
|
|
} |
10374
|
|
|
} else { |
10375
|
|
|
return false; |
10376
|
|
|
} |
10377
|
|
|
} |
10378
|
|
|
|
10379
|
|
|
// iconv and mbstring do not support integer $needle |
10380
|
14 |
|
if ((int) $needle === $needle && $needle >= 0) { |
10381
|
|
|
$needle = (string) self::chr($needle); |
10382
|
|
|
} |
10383
|
14 |
|
$needle = (string) $needle; |
10384
|
|
|
|
10385
|
14 |
|
if ($haystack === '') { |
10386
|
1 |
|
if (\PHP_VERSION_ID >= 80000 && $needle === '') { |
10387
|
|
|
return 0; |
10388
|
|
|
} |
10389
|
|
|
|
10390
|
1 |
|
return false; |
10391
|
|
|
} |
10392
|
|
|
|
10393
|
14 |
|
if ($needle === '' && \PHP_VERSION_ID < 80000) { |
10394
|
|
|
return false; |
10395
|
|
|
} |
10396
|
|
|
|
10397
|
14 |
|
if ($clean_utf8) { |
10398
|
|
|
// mb_strripos() && iconv_strripos() is not tolerant to invalid characters |
10399
|
3 |
|
$needle = self::clean($needle); |
10400
|
3 |
|
$haystack = self::clean($haystack); |
10401
|
|
|
} |
10402
|
|
|
|
10403
|
14 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
10404
|
9 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10405
|
|
|
} |
10406
|
|
|
|
10407
|
|
|
// |
10408
|
|
|
// fallback via mbstrig |
10409
|
|
|
// |
10410
|
|
|
|
10411
|
14 |
|
if (self::$SUPPORT['mbstring'] === true) { |
10412
|
14 |
|
if ($encoding === 'UTF-8') { |
10413
|
14 |
|
return \mb_strripos($haystack, $needle, $offset); |
10414
|
|
|
} |
10415
|
|
|
|
10416
|
|
|
return \mb_strripos($haystack, $needle, $offset, $encoding); |
10417
|
|
|
} |
10418
|
|
|
|
10419
|
|
|
// |
10420
|
|
|
// fallback for binary || ascii only |
10421
|
|
|
// |
10422
|
|
|
|
10423
|
|
|
if ( |
10424
|
|
|
$encoding === 'CP850' |
10425
|
|
|
|| |
10426
|
|
|
$encoding === 'ASCII' |
10427
|
|
|
) { |
10428
|
|
|
return \strripos($haystack, $needle, $offset); |
10429
|
|
|
} |
10430
|
|
|
|
10431
|
|
|
if ( |
10432
|
|
|
$encoding !== 'UTF-8' |
10433
|
|
|
&& |
10434
|
|
|
self::$SUPPORT['mbstring'] === false |
10435
|
|
|
) { |
10436
|
|
|
/** |
10437
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
10438
|
|
|
*/ |
10439
|
|
|
\trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
10440
|
|
|
} |
10441
|
|
|
|
10442
|
|
|
// |
10443
|
|
|
// fallback via intl |
10444
|
|
|
// |
10445
|
|
|
|
10446
|
|
|
if ( |
10447
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings |
10448
|
|
|
&& |
10449
|
|
|
$offset >= 0 // grapheme_strripos() can't handle negative offset |
10450
|
|
|
&& |
10451
|
|
|
self::$SUPPORT['intl'] === true |
10452
|
|
|
) { |
10453
|
|
|
$return_tmp = \grapheme_strripos($haystack, $needle, $offset); |
10454
|
|
|
if ($return_tmp !== false) { |
10455
|
|
|
return $return_tmp; |
10456
|
|
|
} |
10457
|
|
|
} |
10458
|
|
|
|
10459
|
|
|
// |
10460
|
|
|
// fallback for ascii only |
10461
|
|
|
// |
10462
|
|
|
|
10463
|
|
|
if (ASCII::is_ascii($haystack . $needle)) { |
10464
|
|
|
return \strripos($haystack, $needle, $offset); |
10465
|
|
|
} |
10466
|
|
|
|
10467
|
|
|
// |
10468
|
|
|
// fallback via vanilla php |
10469
|
|
|
// |
10470
|
|
|
|
10471
|
|
|
$haystack = self::strtocasefold($haystack, true, false, $encoding); |
10472
|
|
|
$needle = self::strtocasefold($needle, true, false, $encoding); |
10473
|
|
|
|
10474
|
|
|
return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8); |
10475
|
|
|
} |
10476
|
|
|
|
10477
|
|
|
/** |
10478
|
|
|
* Finds position of last occurrence of a string within another, case-insensitive. |
10479
|
|
|
* |
10480
|
|
|
* @param string $haystack <p> |
10481
|
|
|
* The string from which to get the position of the last occurrence |
10482
|
|
|
* of needle. |
10483
|
|
|
* </p> |
10484
|
|
|
* @param string $needle <p> |
10485
|
|
|
* The string to find in haystack. |
10486
|
|
|
* </p> |
10487
|
|
|
* @param int $offset [optional] <p> |
10488
|
|
|
* The position in haystack |
10489
|
|
|
* to start searching. |
10490
|
|
|
* </p> |
10491
|
|
|
* |
10492
|
|
|
* @psalm-pure |
10493
|
|
|
* |
10494
|
|
|
* @return false|int |
10495
|
|
|
* <p>eturn the numeric position of the last occurrence of needle in the |
10496
|
|
|
* haystack string, or false if needle is not found.</p> |
10497
|
|
|
*/ |
10498
|
2 |
|
public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0) |
10499
|
|
|
{ |
10500
|
2 |
|
if ($haystack === '' || $needle === '') { |
10501
|
|
|
return false; |
10502
|
|
|
} |
10503
|
|
|
|
10504
|
2 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
10505
|
|
|
// "mb_" is available if overload is used, so use it ... |
10506
|
|
|
return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT |
10507
|
|
|
} |
10508
|
|
|
|
10509
|
2 |
|
return \strripos($haystack, $needle, $offset); |
10510
|
|
|
} |
10511
|
|
|
|
10512
|
|
|
/** |
10513
|
|
|
* Find the position of the last occurrence of a substring in a string. |
10514
|
|
|
* |
10515
|
|
|
* EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code> |
10516
|
|
|
* |
10517
|
|
|
* @see http://php.net/manual/en/function.mb-strrpos.php |
10518
|
|
|
* |
10519
|
|
|
* @param string $haystack <p>The string being checked, for the last occurrence of needle</p> |
10520
|
|
|
* @param int|string $needle <p>The string to find in haystack.<br>Or a code point as int.</p> |
10521
|
|
|
* @param int $offset [optional] <p>May be specified to begin searching an arbitrary number of characters |
10522
|
|
|
* into the string. Negative values will stop searching at an arbitrary point prior to |
10523
|
|
|
* the end of the string. |
10524
|
|
|
* </p> |
10525
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
10526
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10527
|
|
|
* |
10528
|
|
|
* @psalm-pure |
10529
|
|
|
* |
10530
|
|
|
* @return false|int |
10531
|
|
|
* <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack |
10532
|
|
|
* string.<br>If needle is not found, it returns false.</p> |
10533
|
|
|
*/ |
10534
|
35 |
|
public static function strrpos( |
10535
|
|
|
string $haystack, |
10536
|
|
|
$needle, |
10537
|
|
|
int $offset = 0, |
10538
|
|
|
string $encoding = 'UTF-8', |
10539
|
|
|
bool $clean_utf8 = false |
10540
|
|
|
) { |
10541
|
35 |
|
if ($haystack === '') { |
10542
|
4 |
|
if (\PHP_VERSION_ID >= 80000) { |
10543
|
4 |
|
if ($needle === '') { |
10544
|
4 |
|
return 0; |
10545
|
|
|
} |
10546
|
|
|
} else { |
10547
|
|
|
return false; |
10548
|
|
|
} |
10549
|
|
|
} |
10550
|
|
|
|
10551
|
|
|
// iconv and mbstring do not support integer $needle |
10552
|
35 |
|
if ((int) $needle === $needle && $needle >= 0) { |
10553
|
1 |
|
$needle = (string) self::chr($needle); |
10554
|
|
|
} |
10555
|
35 |
|
$needle = (string) $needle; |
10556
|
|
|
|
10557
|
35 |
|
if ($haystack === '') { |
10558
|
2 |
|
if (\PHP_VERSION_ID >= 80000 && $needle === '') { |
10559
|
|
|
return 0; |
10560
|
|
|
} |
10561
|
|
|
|
10562
|
2 |
|
return false; |
10563
|
|
|
} |
10564
|
|
|
|
10565
|
34 |
|
if ($needle === '' && \PHP_VERSION_ID < 80000) { |
10566
|
|
|
return false; |
10567
|
|
|
} |
10568
|
|
|
|
10569
|
34 |
|
if ($clean_utf8) { |
10570
|
|
|
// mb_strrpos && iconv_strrpos is not tolerant to invalid characters |
10571
|
4 |
|
$needle = self::clean($needle); |
10572
|
4 |
|
$haystack = self::clean($haystack); |
10573
|
|
|
} |
10574
|
|
|
|
10575
|
34 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
10576
|
8 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10577
|
|
|
} |
10578
|
|
|
|
10579
|
|
|
// |
10580
|
|
|
// fallback via mbstring |
10581
|
|
|
// |
10582
|
|
|
|
10583
|
34 |
|
if (self::$SUPPORT['mbstring'] === true) { |
10584
|
34 |
|
if ($encoding === 'UTF-8') { |
10585
|
34 |
|
return \mb_strrpos($haystack, $needle, $offset); |
10586
|
|
|
} |
10587
|
|
|
|
10588
|
2 |
|
return \mb_strrpos($haystack, $needle, $offset, $encoding); |
10589
|
|
|
} |
10590
|
|
|
|
10591
|
|
|
// |
10592
|
|
|
// fallback for binary || ascii only |
10593
|
|
|
// |
10594
|
|
|
|
10595
|
|
|
if ( |
10596
|
|
|
$encoding === 'CP850' |
10597
|
|
|
|| |
10598
|
|
|
$encoding === 'ASCII' |
10599
|
|
|
) { |
10600
|
|
|
return \strrpos($haystack, $needle, $offset); |
10601
|
|
|
} |
10602
|
|
|
|
10603
|
|
|
if ( |
10604
|
|
|
$encoding !== 'UTF-8' |
10605
|
|
|
&& |
10606
|
|
|
self::$SUPPORT['mbstring'] === false |
10607
|
|
|
) { |
10608
|
|
|
/** |
10609
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
10610
|
|
|
*/ |
10611
|
|
|
\trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
10612
|
|
|
} |
10613
|
|
|
|
10614
|
|
|
// |
10615
|
|
|
// fallback via intl |
10616
|
|
|
// |
10617
|
|
|
|
10618
|
|
|
if ( |
10619
|
|
|
$offset >= 0 // grapheme_strrpos() can't handle negative offset |
10620
|
|
|
&& |
10621
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings |
10622
|
|
|
&& |
10623
|
|
|
self::$SUPPORT['intl'] === true |
10624
|
|
|
) { |
10625
|
|
|
$return_tmp = \grapheme_strrpos($haystack, $needle, $offset); |
10626
|
|
|
if ($return_tmp !== false) { |
10627
|
|
|
return $return_tmp; |
10628
|
|
|
} |
10629
|
|
|
} |
10630
|
|
|
|
10631
|
|
|
// |
10632
|
|
|
// fallback for ascii only |
10633
|
|
|
// |
10634
|
|
|
|
10635
|
|
|
if (ASCII::is_ascii($haystack . $needle)) { |
10636
|
|
|
return \strrpos($haystack, $needle, $offset); |
10637
|
|
|
} |
10638
|
|
|
|
10639
|
|
|
// |
10640
|
|
|
// fallback via vanilla php |
10641
|
|
|
// |
10642
|
|
|
|
10643
|
|
|
$haystack_tmp = null; |
10644
|
|
|
if ($offset > 0) { |
10645
|
|
|
$haystack_tmp = self::substr($haystack, $offset); |
10646
|
|
|
} elseif ($offset < 0) { |
10647
|
|
|
$haystack_tmp = self::substr($haystack, 0, $offset); |
10648
|
|
|
$offset = 0; |
10649
|
|
|
} |
10650
|
|
|
|
10651
|
|
|
if ($haystack_tmp !== null) { |
10652
|
|
|
if ($haystack_tmp === false) { |
10653
|
|
|
$haystack_tmp = ''; |
10654
|
|
|
} |
10655
|
|
|
$haystack = (string) $haystack_tmp; |
10656
|
|
|
} |
10657
|
|
|
|
10658
|
|
|
$pos = \strrpos($haystack, $needle); |
10659
|
|
|
if ($pos === false) { |
10660
|
|
|
return false; |
10661
|
|
|
} |
10662
|
|
|
|
10663
|
|
|
/** @var false|string $str_tmp - needed for PhpStan (stubs error) */ |
10664
|
|
|
$str_tmp = \substr($haystack, 0, $pos); |
10665
|
|
|
if ($str_tmp === false) { |
10666
|
|
|
return false; |
10667
|
|
|
} |
10668
|
|
|
|
10669
|
|
|
return $offset + (int) self::strlen($str_tmp); |
10670
|
|
|
} |
10671
|
|
|
|
10672
|
|
|
/** |
10673
|
|
|
* Find the position of the last occurrence of a substring in a string. |
10674
|
|
|
* |
10675
|
|
|
* @param string $haystack <p> |
10676
|
|
|
* The string being checked, for the last occurrence |
10677
|
|
|
* of needle. |
10678
|
|
|
* </p> |
10679
|
|
|
* @param string $needle <p> |
10680
|
|
|
* The string to find in haystack. |
10681
|
|
|
* </p> |
10682
|
|
|
* @param int $offset [optional] <p>May be specified to begin searching an arbitrary number of characters into |
10683
|
|
|
* the string. Negative values will stop searching at an arbitrary point |
10684
|
|
|
* prior to the end of the string. |
10685
|
|
|
* </p> |
10686
|
|
|
* |
10687
|
|
|
* @psalm-pure |
10688
|
|
|
* |
10689
|
|
|
* @return false|int |
10690
|
|
|
* <p>The numeric position of the last occurrence of needle in the |
10691
|
|
|
* haystack string. If needle is not found, it returns false.</p> |
10692
|
|
|
*/ |
10693
|
2 |
|
public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0) |
10694
|
|
|
{ |
10695
|
2 |
|
if ($haystack === '' || $needle === '') { |
10696
|
|
|
return false; |
10697
|
|
|
} |
10698
|
|
|
|
10699
|
2 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
10700
|
|
|
// "mb_" is available if overload is used, so use it ... |
10701
|
|
|
return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT |
10702
|
|
|
} |
10703
|
|
|
|
10704
|
2 |
|
return \strrpos($haystack, $needle, $offset); |
10705
|
|
|
} |
10706
|
|
|
|
10707
|
|
|
/** |
10708
|
|
|
* Finds the length of the initial segment of a string consisting entirely of characters contained within a given |
10709
|
|
|
* mask. |
10710
|
|
|
* |
10711
|
|
|
* EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code> |
10712
|
|
|
* |
10713
|
|
|
* @param string $str <p>The input string.</p> |
10714
|
|
|
* @param string $mask <p>The mask of chars</p> |
10715
|
|
|
* @param int $offset [optional] |
10716
|
|
|
* @param int|null $length [optional] |
10717
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
10718
|
|
|
* |
10719
|
|
|
* @psalm-pure |
10720
|
|
|
* |
10721
|
|
|
* @return false|int |
10722
|
|
|
*/ |
10723
|
10 |
|
public static function strspn( |
10724
|
|
|
string $str, |
10725
|
|
|
string $mask, |
10726
|
|
|
int $offset = 0, |
10727
|
|
|
int $length = null, |
10728
|
|
|
string $encoding = 'UTF-8' |
10729
|
|
|
) { |
10730
|
10 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
10731
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10732
|
|
|
} |
10733
|
|
|
|
10734
|
10 |
|
if ($offset || $length !== null) { |
10735
|
2 |
|
if ($encoding === 'UTF-8') { |
10736
|
2 |
|
if ($length === null) { |
10737
|
|
|
$str = (string) \mb_substr($str, $offset); |
10738
|
|
|
} else { |
10739
|
2 |
|
$str = (string) \mb_substr($str, $offset, $length); |
10740
|
|
|
} |
10741
|
|
|
} else { |
10742
|
|
|
$str = (string) self::substr($str, $offset, $length, $encoding); |
10743
|
|
|
} |
10744
|
|
|
} |
10745
|
|
|
|
10746
|
10 |
|
if ($str === '' || $mask === '') { |
10747
|
2 |
|
return 0; |
10748
|
|
|
} |
10749
|
|
|
|
10750
|
8 |
|
$matches = []; |
10751
|
|
|
|
10752
|
8 |
|
return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0; |
10753
|
|
|
} |
10754
|
|
|
|
10755
|
|
|
/** |
10756
|
|
|
* Returns part of haystack string from the first occurrence of needle to the end of haystack. |
10757
|
|
|
* |
10758
|
|
|
* EXAMPLE: <code> |
10759
|
|
|
* $str = 'iñtërnâtiônàlizætiøn'; |
10760
|
|
|
* $search = 'nât'; |
10761
|
|
|
* |
10762
|
|
|
* UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn' |
10763
|
|
|
* UTF8::strstr($str, $search, true)); // 'iñtër' |
10764
|
|
|
* </code> |
10765
|
|
|
* |
10766
|
|
|
* @param string $haystack <p>The input string. Must be valid UTF-8.</p> |
10767
|
|
|
* @param string $needle <p>The string to look for. Must be valid UTF-8.</p> |
10768
|
|
|
* @param bool $before_needle [optional] <p> |
10769
|
|
|
* If <b>TRUE</b>, strstr() returns the part of the |
10770
|
|
|
* haystack before the first occurrence of the needle (excluding the needle). |
10771
|
|
|
* </p> |
10772
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10773
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10774
|
|
|
* |
10775
|
|
|
* @psalm-pure |
10776
|
|
|
* |
10777
|
|
|
* @return false|string |
10778
|
|
|
* <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p> |
10779
|
|
|
*/ |
10780
|
3 |
|
public static function strstr( |
10781
|
|
|
string $haystack, |
10782
|
|
|
string $needle, |
10783
|
|
|
bool $before_needle = false, |
10784
|
|
|
string $encoding = 'UTF-8', |
10785
|
|
|
bool $clean_utf8 = false |
10786
|
|
|
) { |
10787
|
3 |
|
if ($haystack === '') { |
10788
|
2 |
|
if (\PHP_VERSION_ID >= 80000 && $needle === '') { |
10789
|
1 |
|
return ''; |
10790
|
|
|
} |
10791
|
|
|
|
10792
|
2 |
|
return false; |
10793
|
|
|
} |
10794
|
|
|
|
10795
|
3 |
|
if ($clean_utf8) { |
10796
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
10797
|
|
|
// if invalid characters are found in $haystack before $needle |
10798
|
|
|
$needle = self::clean($needle); |
10799
|
|
|
$haystack = self::clean($haystack); |
10800
|
|
|
} |
10801
|
|
|
|
10802
|
3 |
|
if ($needle === '') { |
10803
|
1 |
|
if (\PHP_VERSION_ID >= 80000) { |
10804
|
1 |
|
return $haystack; |
10805
|
|
|
} |
10806
|
|
|
|
10807
|
|
|
return false; |
10808
|
|
|
} |
10809
|
|
|
|
10810
|
3 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
10811
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10812
|
|
|
} |
10813
|
|
|
|
10814
|
|
|
// |
10815
|
|
|
// fallback via mbstring |
10816
|
|
|
// |
10817
|
|
|
|
10818
|
3 |
|
if (self::$SUPPORT['mbstring'] === true) { |
10819
|
3 |
|
if ($encoding === 'UTF-8') { |
10820
|
3 |
|
return \mb_strstr($haystack, $needle, $before_needle); |
10821
|
|
|
} |
10822
|
|
|
|
10823
|
2 |
|
return \mb_strstr($haystack, $needle, $before_needle, $encoding); |
10824
|
|
|
} |
10825
|
|
|
|
10826
|
|
|
// |
10827
|
|
|
// fallback for binary || ascii only |
10828
|
|
|
// |
10829
|
|
|
|
10830
|
|
|
if ( |
10831
|
|
|
$encoding === 'CP850' |
10832
|
|
|
|| |
10833
|
|
|
$encoding === 'ASCII' |
10834
|
|
|
) { |
10835
|
|
|
return \strstr($haystack, $needle, $before_needle); |
10836
|
|
|
} |
10837
|
|
|
|
10838
|
|
|
if ( |
10839
|
|
|
$encoding !== 'UTF-8' |
10840
|
|
|
&& |
10841
|
|
|
self::$SUPPORT['mbstring'] === false |
10842
|
|
|
) { |
10843
|
|
|
/** |
10844
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
10845
|
|
|
*/ |
10846
|
|
|
\trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
10847
|
|
|
} |
10848
|
|
|
|
10849
|
|
|
// |
10850
|
|
|
// fallback via intl |
10851
|
|
|
// |
10852
|
|
|
|
10853
|
|
|
if ( |
10854
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings |
10855
|
|
|
&& |
10856
|
|
|
self::$SUPPORT['intl'] === true |
10857
|
|
|
) { |
10858
|
|
|
$return_tmp = \grapheme_strstr($haystack, $needle, $before_needle); |
10859
|
|
|
if ($return_tmp !== false) { |
10860
|
|
|
return $return_tmp; |
10861
|
|
|
} |
10862
|
|
|
} |
10863
|
|
|
|
10864
|
|
|
// |
10865
|
|
|
// fallback for ascii only |
10866
|
|
|
// |
10867
|
|
|
|
10868
|
|
|
if (ASCII::is_ascii($haystack . $needle)) { |
10869
|
|
|
return \strstr($haystack, $needle, $before_needle); |
10870
|
|
|
} |
10871
|
|
|
|
10872
|
|
|
// |
10873
|
|
|
// fallback via vanilla php |
10874
|
|
|
// |
10875
|
|
|
|
10876
|
|
|
\preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match); |
10877
|
|
|
|
10878
|
|
|
if (!isset($match[1])) { |
10879
|
|
|
return false; |
10880
|
|
|
} |
10881
|
|
|
|
10882
|
|
|
if ($before_needle) { |
10883
|
|
|
return $match[1]; |
10884
|
|
|
} |
10885
|
|
|
|
10886
|
|
|
return self::substr($haystack, (int) self::strlen($match[1])); |
10887
|
|
|
} |
10888
|
|
|
|
10889
|
|
|
/** |
10890
|
|
|
* Finds first occurrence of a string within another. |
10891
|
|
|
* |
10892
|
|
|
* @param string $haystack <p> |
10893
|
|
|
* The string from which to get the first occurrence |
10894
|
|
|
* of needle. |
10895
|
|
|
* </p> |
10896
|
|
|
* @param string $needle <p> |
10897
|
|
|
* The string to find in haystack. |
10898
|
|
|
* </p> |
10899
|
|
|
* @param bool $before_needle [optional] <p> |
10900
|
|
|
* Determines which portion of haystack |
10901
|
|
|
* this function returns. |
10902
|
|
|
* If set to true, it returns all of haystack |
10903
|
|
|
* from the beginning to the first occurrence of needle. |
10904
|
|
|
* If set to false, it returns all of haystack |
10905
|
|
|
* from the first occurrence of needle to the end, |
10906
|
|
|
* </p> |
10907
|
|
|
* |
10908
|
|
|
* @psalm-pure |
10909
|
|
|
* |
10910
|
|
|
* @return false|string |
10911
|
|
|
* <p>The portion of haystack, |
10912
|
|
|
* or false if needle is not found.</p> |
10913
|
|
|
*/ |
10914
|
2 |
|
public static function strstr_in_byte( |
10915
|
|
|
string $haystack, |
10916
|
|
|
string $needle, |
10917
|
|
|
bool $before_needle = false |
10918
|
|
|
) { |
10919
|
2 |
|
if ($haystack === '' || $needle === '') { |
10920
|
|
|
return false; |
10921
|
|
|
} |
10922
|
|
|
|
10923
|
2 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
10924
|
|
|
// "mb_" is available if overload is used, so use it ... |
10925
|
|
|
return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT |
10926
|
|
|
} |
10927
|
|
|
|
10928
|
2 |
|
return \strstr($haystack, $needle, $before_needle); |
10929
|
|
|
} |
10930
|
|
|
|
10931
|
|
|
/** |
10932
|
|
|
* Unicode transformation for case-less matching. |
10933
|
|
|
* |
10934
|
|
|
* EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code> |
10935
|
|
|
* |
10936
|
|
|
* @see http://unicode.org/reports/tr21/tr21-5.html |
10937
|
|
|
* |
10938
|
|
|
* @param string $str <p>The input string.</p> |
10939
|
|
|
* @param bool $full [optional] <p> |
10940
|
|
|
* <b>true</b>, replace full case folding chars (default)<br> |
10941
|
|
|
* <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD] |
10942
|
|
|
* </p> |
10943
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10944
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
10945
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
10946
|
|
|
* @param bool $lower [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase |
10947
|
|
|
* is for some languages better ...</p> |
10948
|
|
|
* |
10949
|
|
|
* @psalm-pure |
10950
|
|
|
* |
10951
|
|
|
* @return string |
10952
|
|
|
*/ |
10953
|
32 |
|
public static function strtocasefold( |
10954
|
|
|
string $str, |
10955
|
|
|
bool $full = true, |
10956
|
|
|
bool $clean_utf8 = false, |
10957
|
|
|
string $encoding = 'UTF-8', |
10958
|
|
|
string $lang = null, |
10959
|
|
|
bool $lower = true |
10960
|
|
|
): string { |
10961
|
32 |
|
if ($str === '') { |
10962
|
7 |
|
return ''; |
10963
|
|
|
} |
10964
|
|
|
|
10965
|
31 |
|
if ($clean_utf8) { |
10966
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
10967
|
|
|
// if invalid characters are found in $haystack before $needle |
10968
|
2 |
|
$str = self::clean($str); |
10969
|
|
|
} |
10970
|
|
|
|
10971
|
31 |
|
$str = self::fixStrCaseHelper($str, $lower, $full); |
10972
|
|
|
|
10973
|
31 |
|
if ($lang === null && $encoding === 'UTF-8') { |
10974
|
31 |
|
if ($lower) { |
10975
|
2 |
|
return \mb_strtolower($str); |
10976
|
|
|
} |
10977
|
|
|
|
10978
|
29 |
|
return \mb_strtoupper($str); |
10979
|
|
|
} |
10980
|
|
|
|
10981
|
2 |
|
if ($lower) { |
10982
|
|
|
return self::strtolower($str, $encoding, false, $lang); |
10983
|
|
|
} |
10984
|
|
|
|
10985
|
2 |
|
return self::strtoupper($str, $encoding, false, $lang); |
10986
|
|
|
} |
10987
|
|
|
|
10988
|
|
|
/** |
10989
|
|
|
* Make a string lowercase. |
10990
|
|
|
* |
10991
|
|
|
* EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code> |
10992
|
|
|
* |
10993
|
|
|
* @see http://php.net/manual/en/function.mb-strtolower.php |
10994
|
|
|
* |
10995
|
|
|
* @param string $str <p>The string being lowercased.</p> |
10996
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10997
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10998
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
10999
|
|
|
* tr</p> |
11000
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
11001
|
|
|
* -> ß</p> |
11002
|
|
|
* |
11003
|
|
|
* @psalm-pure |
11004
|
|
|
* |
11005
|
|
|
* @return string |
11006
|
|
|
* <p>String with all alphabetic characters converted to lowercase.</p> |
11007
|
|
|
*/ |
11008
|
73 |
|
public static function strtolower( |
11009
|
|
|
$str, |
11010
|
|
|
string $encoding = 'UTF-8', |
11011
|
|
|
bool $clean_utf8 = false, |
11012
|
|
|
string $lang = null, |
11013
|
|
|
bool $try_to_keep_the_string_length = false |
11014
|
|
|
): string { |
11015
|
|
|
// init |
11016
|
73 |
|
$str = (string) $str; |
11017
|
|
|
|
11018
|
73 |
|
if ($str === '') { |
11019
|
1 |
|
return ''; |
11020
|
|
|
} |
11021
|
|
|
|
11022
|
72 |
|
if ($clean_utf8) { |
11023
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
11024
|
|
|
// if invalid characters are found in $haystack before $needle |
11025
|
2 |
|
$str = self::clean($str); |
11026
|
|
|
} |
11027
|
|
|
|
11028
|
|
|
// hack for old php version or for the polyfill ... |
11029
|
72 |
|
if ($try_to_keep_the_string_length) { |
11030
|
|
|
$str = self::fixStrCaseHelper($str, true); |
11031
|
|
|
} |
11032
|
|
|
|
11033
|
72 |
|
if ($lang === null && $encoding === 'UTF-8') { |
11034
|
13 |
|
return \mb_strtolower($str); |
11035
|
|
|
} |
11036
|
|
|
|
11037
|
61 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
11038
|
|
|
|
11039
|
61 |
|
if ($lang !== null) { |
11040
|
2 |
|
if (self::$SUPPORT['intl'] === true) { |
11041
|
2 |
|
if (self::$INTL_TRANSLITERATOR_LIST === null) { |
11042
|
|
|
self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); |
11043
|
|
|
} |
11044
|
|
|
|
11045
|
2 |
|
$language_code = $lang . '-Lower'; |
11046
|
2 |
|
if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) { |
11047
|
|
|
/** |
11048
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
11049
|
|
|
*/ |
11050
|
|
|
\trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING); |
|
|
|
|
11051
|
|
|
|
11052
|
|
|
$language_code = 'Any-Lower'; |
11053
|
|
|
} |
11054
|
|
|
|
11055
|
2 |
|
return (string) \transliterator_transliterate($language_code, $str); |
11056
|
|
|
} |
11057
|
|
|
|
11058
|
|
|
/** |
11059
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
11060
|
|
|
*/ |
11061
|
|
|
\trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING); |
11062
|
|
|
} |
11063
|
|
|
|
11064
|
|
|
// always fallback via symfony polyfill |
11065
|
61 |
|
return \mb_strtolower($str, $encoding); |
11066
|
|
|
} |
11067
|
|
|
|
11068
|
|
|
/** |
11069
|
|
|
* Make a string uppercase. |
11070
|
|
|
* |
11071
|
|
|
* EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code> |
11072
|
|
|
* |
11073
|
|
|
* @see http://php.net/manual/en/function.mb-strtoupper.php |
11074
|
|
|
* |
11075
|
|
|
* @param string $str <p>The string being uppercased.</p> |
11076
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
11077
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
11078
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
11079
|
|
|
* tr</p> |
11080
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
11081
|
|
|
* -> ß</p> |
11082
|
|
|
* |
11083
|
|
|
* @psalm-pure |
11084
|
|
|
* |
11085
|
|
|
* @return string |
11086
|
|
|
* <p>String with all alphabetic characters converted to uppercase.</p> |
11087
|
|
|
*/ |
11088
|
17 |
|
public static function strtoupper( |
11089
|
|
|
$str, |
11090
|
|
|
string $encoding = 'UTF-8', |
11091
|
|
|
bool $clean_utf8 = false, |
11092
|
|
|
string $lang = null, |
11093
|
|
|
bool $try_to_keep_the_string_length = false |
11094
|
|
|
): string { |
11095
|
|
|
// init |
11096
|
17 |
|
$str = (string) $str; |
11097
|
|
|
|
11098
|
17 |
|
if ($str === '') { |
11099
|
1 |
|
return ''; |
11100
|
|
|
} |
11101
|
|
|
|
11102
|
16 |
|
if ($clean_utf8) { |
11103
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
11104
|
|
|
// if invalid characters are found in $haystack before $needle |
11105
|
2 |
|
$str = self::clean($str); |
11106
|
|
|
} |
11107
|
|
|
|
11108
|
|
|
// hack for old php version or for the polyfill ... |
11109
|
16 |
|
if ($try_to_keep_the_string_length) { |
11110
|
2 |
|
$str = self::fixStrCaseHelper($str); |
11111
|
|
|
} |
11112
|
|
|
|
11113
|
16 |
|
if ($lang === null && $encoding === 'UTF-8') { |
11114
|
8 |
|
return \mb_strtoupper($str); |
11115
|
|
|
} |
11116
|
|
|
|
11117
|
10 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
11118
|
|
|
|
11119
|
10 |
|
if ($lang !== null) { |
11120
|
2 |
|
if (self::$SUPPORT['intl'] === true) { |
11121
|
2 |
|
if (self::$INTL_TRANSLITERATOR_LIST === null) { |
11122
|
|
|
self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); |
11123
|
|
|
} |
11124
|
|
|
|
11125
|
2 |
|
$language_code = $lang . '-Upper'; |
11126
|
2 |
|
if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) { |
11127
|
|
|
/** |
11128
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
11129
|
|
|
*/ |
11130
|
|
|
\trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING); |
11131
|
|
|
|
11132
|
|
|
$language_code = 'Any-Upper'; |
11133
|
|
|
} |
11134
|
|
|
|
11135
|
2 |
|
return (string) \transliterator_transliterate($language_code, $str); |
11136
|
|
|
} |
11137
|
|
|
|
11138
|
|
|
/** |
11139
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
11140
|
|
|
*/ |
11141
|
|
|
\trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING); |
11142
|
|
|
} |
11143
|
|
|
|
11144
|
|
|
// always fallback via symfony polyfill |
11145
|
10 |
|
return \mb_strtoupper($str, $encoding); |
11146
|
|
|
} |
11147
|
|
|
|
11148
|
|
|
/** |
11149
|
|
|
* Translate characters or replace sub-strings. |
11150
|
|
|
* |
11151
|
|
|
* EXAMPLE: |
11152
|
|
|
* <code> |
11153
|
|
|
* $array = [ |
11154
|
|
|
* 'Hello' => '○●◎', |
11155
|
|
|
* '中文空白' => 'earth', |
11156
|
|
|
* ]; |
11157
|
|
|
* UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth' |
11158
|
|
|
* </code> |
11159
|
|
|
* |
11160
|
|
|
* @see http://php.net/manual/en/function.strtr.php |
11161
|
|
|
* |
11162
|
|
|
* @param string $str <p>The string being translated.</p> |
11163
|
|
|
* @param string|string[] $from <p>The string replacing from.</p> |
11164
|
|
|
* @param string|string[] $to [optional] <p>The string being translated to to.</p> |
11165
|
|
|
* |
11166
|
|
|
* @psalm-pure |
11167
|
|
|
* |
11168
|
|
|
* @return string |
11169
|
|
|
* <p>This function returns a copy of str, translating all occurrences of each character in "from" |
11170
|
|
|
* to the corresponding character in "to".</p> |
11171
|
|
|
*/ |
11172
|
2 |
|
public static function strtr(string $str, $from, $to = ''): string |
11173
|
|
|
{ |
11174
|
2 |
|
if ($str === '') { |
11175
|
|
|
return ''; |
11176
|
|
|
} |
11177
|
|
|
|
11178
|
2 |
|
if ($from === $to) { |
11179
|
|
|
return $str; |
11180
|
|
|
} |
11181
|
|
|
|
11182
|
2 |
|
if ($to !== '') { |
11183
|
2 |
|
if (!\is_array($from)) { |
11184
|
2 |
|
$from = self::str_split($from); |
11185
|
|
|
} |
11186
|
|
|
|
11187
|
2 |
|
if (!\is_array($to)) { |
11188
|
2 |
|
$to = self::str_split($to); |
11189
|
|
|
} |
11190
|
|
|
|
11191
|
2 |
|
$count_from = \count($from); |
11192
|
2 |
|
$count_to = \count($to); |
11193
|
|
|
|
11194
|
2 |
|
if ($count_from !== $count_to) { |
11195
|
2 |
|
if ($count_from > $count_to) { |
11196
|
2 |
|
$from = \array_slice($from, 0, $count_to); |
11197
|
2 |
|
} elseif ($count_from < $count_to) { |
11198
|
2 |
|
$to = \array_slice($to, 0, $count_from); |
11199
|
|
|
} |
11200
|
|
|
} |
11201
|
|
|
|
11202
|
2 |
|
$from = \array_combine($from, $to); |
11203
|
2 |
|
if ($from === false) { |
11204
|
|
|
throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')'); |
|
|
|
|
11205
|
|
|
} |
11206
|
|
|
} |
11207
|
|
|
|
11208
|
2 |
|
if (\is_string($from)) { |
11209
|
2 |
|
return \str_replace($from, $to, $str); |
|
|
|
|
11210
|
|
|
} |
11211
|
|
|
|
11212
|
2 |
|
return \strtr($str, $from); |
11213
|
|
|
} |
11214
|
|
|
|
11215
|
|
|
/** |
11216
|
|
|
* Return the width of a string. |
11217
|
|
|
* |
11218
|
|
|
* INFO: use UTF8::strlen() for the byte-length |
11219
|
|
|
* |
11220
|
|
|
* EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code> |
11221
|
|
|
* |
11222
|
|
|
* @param string $str <p>The input string.</p> |
11223
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
11224
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
11225
|
|
|
* |
11226
|
|
|
* @psalm-pure |
11227
|
|
|
* |
11228
|
|
|
* @return int |
11229
|
|
|
*/ |
11230
|
2 |
|
public static function strwidth( |
11231
|
|
|
string $str, |
11232
|
|
|
string $encoding = 'UTF-8', |
11233
|
|
|
bool $clean_utf8 = false |
11234
|
|
|
): int { |
11235
|
2 |
|
if ($str === '') { |
11236
|
2 |
|
return 0; |
11237
|
|
|
} |
11238
|
|
|
|
11239
|
2 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
11240
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
11241
|
|
|
} |
11242
|
|
|
|
11243
|
2 |
|
if ($clean_utf8) { |
11244
|
|
|
// iconv and mbstring are not tolerant to invalid encoding |
11245
|
|
|
// further, their behaviour is inconsistent with that of PHP's substr |
11246
|
2 |
|
$str = self::clean($str); |
11247
|
|
|
} |
11248
|
|
|
|
11249
|
|
|
// |
11250
|
|
|
// fallback via mbstring |
11251
|
|
|
// |
11252
|
|
|
|
11253
|
2 |
|
if (self::$SUPPORT['mbstring'] === true) { |
11254
|
2 |
|
if ($encoding === 'UTF-8') { |
11255
|
2 |
|
return \mb_strwidth($str); |
11256
|
|
|
} |
11257
|
|
|
|
11258
|
|
|
return \mb_strwidth($str, $encoding); |
11259
|
|
|
} |
11260
|
|
|
|
11261
|
|
|
// |
11262
|
|
|
// fallback via vanilla php |
11263
|
|
|
// |
11264
|
|
|
|
11265
|
|
|
if ($encoding !== 'UTF-8') { |
11266
|
|
|
$str = self::encode('UTF-8', $str, false, $encoding); |
11267
|
|
|
} |
11268
|
|
|
|
11269
|
|
|
$wide = 0; |
11270
|
|
|
$str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide); |
11271
|
|
|
|
11272
|
|
|
return ($wide << 1) + (int) self::strlen($str); |
11273
|
|
|
} |
11274
|
|
|
|
11275
|
|
|
/** |
11276
|
|
|
* Get part of a string. |
11277
|
|
|
* |
11278
|
|
|
* EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code> |
11279
|
|
|
* |
11280
|
|
|
* @see http://php.net/manual/en/function.mb-substr.php |
11281
|
|
|
* |
11282
|
|
|
* @param string $str <p>The string being checked.</p> |
11283
|
|
|
* @param int $offset <p>The first position used in str.</p> |
11284
|
|
|
* @param int|null $length [optional] <p>The maximum length of the returned string.</p> |
11285
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
11286
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
11287
|
|
|
* |
11288
|
|
|
* @psalm-pure |
11289
|
|
|
* |
11290
|
|
|
* @return false|string |
11291
|
|
|
* The portion of <i>str</i> specified by the <i>offset</i> and |
11292
|
|
|
* <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i> |
11293
|
|
|
* characters long, <b>FALSE</b> will be returned. |
11294
|
|
|
*/ |
11295
|
172 |
|
public static function substr( |
11296
|
|
|
string $str, |
11297
|
|
|
int $offset = 0, |
11298
|
|
|
int $length = null, |
11299
|
|
|
string $encoding = 'UTF-8', |
11300
|
|
|
bool $clean_utf8 = false |
11301
|
|
|
) { |
11302
|
|
|
// empty string |
11303
|
172 |
|
if ($str === '' || $length === 0) { |
11304
|
8 |
|
return ''; |
11305
|
|
|
} |
11306
|
|
|
|
11307
|
168 |
|
if ($clean_utf8) { |
11308
|
|
|
// iconv and mbstring are not tolerant to invalid encoding |
11309
|
|
|
// further, their behaviour is inconsistent with that of PHP's substr |
11310
|
2 |
|
$str = self::clean($str); |
11311
|
|
|
} |
11312
|
|
|
|
11313
|
|
|
// whole string |
11314
|
168 |
|
if (!$offset && $length === null) { |
11315
|
7 |
|
return $str; |
11316
|
|
|
} |
11317
|
|
|
|
11318
|
163 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
11319
|
19 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
11320
|
|
|
} |
11321
|
|
|
|
11322
|
|
|
// |
11323
|
|
|
// fallback via mbstring |
11324
|
|
|
// |
11325
|
|
|
|
11326
|
163 |
|
if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') { |
11327
|
161 |
|
if ($length === null) { |
11328
|
64 |
|
return \mb_substr($str, $offset); |
11329
|
|
|
} |
11330
|
|
|
|
11331
|
102 |
|
return \mb_substr($str, $offset, $length); |
11332
|
|
|
} |
11333
|
|
|
|
11334
|
|
|
// |
11335
|
|
|
// fallback for binary || ascii only |
11336
|
|
|
// |
11337
|
|
|
|
11338
|
|
|
if ( |
11339
|
4 |
|
$encoding === 'CP850' |
11340
|
|
|
|| |
11341
|
4 |
|
$encoding === 'ASCII' |
11342
|
|
|
) { |
11343
|
|
|
if ($length === null) { |
11344
|
|
|
return \substr($str, $offset); |
11345
|
|
|
} |
11346
|
|
|
|
11347
|
|
|
return \substr($str, $offset, $length); |
11348
|
|
|
} |
11349
|
|
|
|
11350
|
|
|
// otherwise we need the string-length |
11351
|
4 |
|
$str_length = 0; |
11352
|
4 |
|
if ($offset || $length === null) { |
11353
|
4 |
|
$str_length = self::strlen($str, $encoding); |
11354
|
|
|
} |
11355
|
|
|
|
11356
|
|
|
// e.g.: invalid chars + mbstring not installed |
11357
|
4 |
|
if ($str_length === false) { |
11358
|
|
|
return false; |
11359
|
|
|
} |
11360
|
|
|
|
11361
|
|
|
// empty string |
11362
|
4 |
|
if ($offset === $str_length && !$length) { |
|
|
|
|
11363
|
|
|
return ''; |
11364
|
|
|
} |
11365
|
|
|
|
11366
|
|
|
// impossible |
11367
|
4 |
|
if ($offset && $offset > $str_length) { |
11368
|
|
|
return ''; |
11369
|
|
|
} |
11370
|
|
|
|
11371
|
4 |
|
$length = $length ?? $str_length; |
11372
|
|
|
|
11373
|
|
|
if ( |
11374
|
4 |
|
$encoding !== 'UTF-8' |
11375
|
|
|
&& |
11376
|
4 |
|
self::$SUPPORT['mbstring'] === false |
11377
|
|
|
) { |
11378
|
|
|
/** |
11379
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
11380
|
|
|
*/ |
11381
|
2 |
|
\trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
11382
|
|
|
} |
11383
|
|
|
|
11384
|
|
|
// |
11385
|
|
|
// fallback via intl |
11386
|
|
|
// |
11387
|
|
|
|
11388
|
|
|
if ( |
11389
|
4 |
|
$encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings |
11390
|
|
|
&& |
11391
|
4 |
|
$offset >= 0 // grapheme_substr() can't handle negative offset |
11392
|
|
|
&& |
11393
|
4 |
|
self::$SUPPORT['intl'] === true |
11394
|
|
|
) { |
11395
|
|
|
$return_tmp = \grapheme_substr($str, $offset, $length); |
11396
|
|
|
if ($return_tmp !== false) { |
11397
|
|
|
return $return_tmp; |
11398
|
|
|
} |
11399
|
|
|
} |
11400
|
|
|
|
11401
|
|
|
// |
11402
|
|
|
// fallback via iconv |
11403
|
|
|
// |
11404
|
|
|
|
11405
|
|
|
if ( |
11406
|
4 |
|
$length >= 0 // "iconv_substr()" can't handle negative length |
11407
|
|
|
&& |
11408
|
4 |
|
self::$SUPPORT['iconv'] === true |
11409
|
|
|
) { |
11410
|
|
|
$return_tmp = \iconv_substr($str, $offset, $length); |
11411
|
|
|
if ($return_tmp !== false) { |
11412
|
|
|
return $return_tmp; |
11413
|
|
|
} |
11414
|
|
|
} |
11415
|
|
|
|
11416
|
|
|
// |
11417
|
|
|
// fallback for ascii only |
11418
|
|
|
// |
11419
|
|
|
|
11420
|
4 |
|
if (ASCII::is_ascii($str)) { |
11421
|
|
|
return \substr($str, $offset, $length); |
11422
|
|
|
} |
11423
|
|
|
|
11424
|
|
|
// |
11425
|
|
|
// fallback via vanilla php |
11426
|
|
|
// |
11427
|
|
|
|
11428
|
|
|
// split to array, and remove invalid characters |
11429
|
|
|
// && |
11430
|
|
|
// extract relevant part, and join to make sting again |
11431
|
4 |
|
return \implode('', \array_slice(self::str_split($str), $offset, $length)); |
11432
|
|
|
} |
11433
|
|
|
|
11434
|
|
|
/** |
11435
|
|
|
* Binary-safe comparison of two strings from an offset, up to a length of characters. |
11436
|
|
|
* |
11437
|
|
|
* EXAMPLE: <code> |
11438
|
|
|
* UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1 |
11439
|
|
|
* UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1 |
11440
|
|
|
* UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0 |
11441
|
|
|
* </code> |
11442
|
|
|
* |
11443
|
|
|
* @param string $str1 <p>The main string being compared.</p> |
11444
|
|
|
* @param string $str2 <p>The secondary string being compared.</p> |
11445
|
|
|
* @param int $offset [optional] <p>The start position for the comparison. If negative, it starts |
11446
|
|
|
* counting from the end of the string.</p> |
11447
|
|
|
* @param int|null $length [optional] <p>The length of the comparison. The default value is the largest |
11448
|
|
|
* of the length of the str compared to the length of main_str less the |
11449
|
|
|
* offset.</p> |
11450
|
|
|
* @param bool $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case |
11451
|
|
|
* insensitive.</p> |
11452
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
11453
|
|
|
* |
11454
|
|
|
* @psalm-pure |
11455
|
|
|
* |
11456
|
|
|
* @return int |
11457
|
|
|
* <strong>< 0</strong> if str1 is less than str2;<br> |
11458
|
|
|
* <strong>> 0</strong> if str1 is greater than str2,<br> |
11459
|
|
|
* <strong>0</strong> if they are equal |
11460
|
|
|
*/ |
11461
|
2 |
|
public static function substr_compare( |
11462
|
|
|
string $str1, |
11463
|
|
|
string $str2, |
11464
|
|
|
int $offset = 0, |
11465
|
|
|
int $length = null, |
11466
|
|
|
bool $case_insensitivity = false, |
11467
|
|
|
string $encoding = 'UTF-8' |
11468
|
|
|
): int { |
11469
|
|
|
if ( |
11470
|
2 |
|
$offset !== 0 |
11471
|
|
|
|| |
11472
|
2 |
|
$length !== null |
11473
|
|
|
) { |
11474
|
2 |
|
if ($encoding === 'UTF-8') { |
11475
|
2 |
|
if ($length === null) { |
11476
|
2 |
|
$str1 = (string) \mb_substr($str1, $offset); |
11477
|
|
|
} else { |
11478
|
2 |
|
$str1 = (string) \mb_substr($str1, $offset, $length); |
11479
|
|
|
} |
11480
|
2 |
|
$str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1)); |
11481
|
|
|
} else { |
11482
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
11483
|
|
|
|
11484
|
|
|
$str1 = (string) self::substr($str1, $offset, $length, $encoding); |
11485
|
|
|
$str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding); |
11486
|
|
|
} |
11487
|
|
|
} |
11488
|
|
|
|
11489
|
2 |
|
if ($case_insensitivity) { |
11490
|
2 |
|
return self::strcasecmp($str1, $str2, $encoding); |
11491
|
|
|
} |
11492
|
|
|
|
11493
|
2 |
|
return self::strcmp($str1, $str2); |
11494
|
|
|
} |
11495
|
|
|
|
11496
|
|
|
/** |
11497
|
|
|
* Count the number of substring occurrences. |
11498
|
|
|
* |
11499
|
|
|
* EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code> |
11500
|
|
|
* |
11501
|
|
|
* @see http://php.net/manual/en/function.substr-count.php |
11502
|
|
|
* |
11503
|
|
|
* @param string $haystack <p>The string to search in.</p> |
11504
|
|
|
* @param string $needle <p>The substring to search for.</p> |
11505
|
|
|
* @param int $offset [optional] <p>The offset where to start counting.</p> |
11506
|
|
|
* @param int|null $length [optional] <p> |
11507
|
|
|
* The maximum length after the specified offset to search for the |
11508
|
|
|
* substring. It outputs a warning if the offset plus the length is |
11509
|
|
|
* greater than the haystack length. |
11510
|
|
|
* </p> |
11511
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
11512
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
11513
|
|
|
* |
11514
|
|
|
* @psalm-pure |
11515
|
|
|
* |
11516
|
|
|
* @return false|int |
11517
|
|
|
* <p>This functions returns an integer or false if there isn't a string.</p> |
11518
|
|
|
*/ |
11519
|
5 |
|
public static function substr_count( |
11520
|
|
|
string $haystack, |
11521
|
|
|
string $needle, |
11522
|
|
|
int $offset = 0, |
11523
|
|
|
int $length = null, |
11524
|
|
|
string $encoding = 'UTF-8', |
11525
|
|
|
bool $clean_utf8 = false |
11526
|
|
|
) { |
11527
|
5 |
|
if ($needle === '') { |
11528
|
2 |
|
return false; |
11529
|
|
|
} |
11530
|
|
|
|
11531
|
5 |
|
if ($haystack === '') { |
11532
|
2 |
|
if (\PHP_VERSION_ID >= 80000) { |
11533
|
2 |
|
return 0; |
11534
|
|
|
} |
11535
|
|
|
|
11536
|
|
|
return 0; |
11537
|
|
|
} |
11538
|
|
|
|
11539
|
5 |
|
if ($length === 0) { |
11540
|
2 |
|
return 0; |
11541
|
|
|
} |
11542
|
|
|
|
11543
|
5 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
11544
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
11545
|
|
|
} |
11546
|
|
|
|
11547
|
5 |
|
if ($clean_utf8) { |
11548
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
11549
|
|
|
// if invalid characters are found in $haystack before $needle |
11550
|
|
|
$needle = self::clean($needle); |
11551
|
|
|
$haystack = self::clean($haystack); |
11552
|
|
|
} |
11553
|
|
|
|
11554
|
5 |
|
if ($offset || $length > 0) { |
11555
|
2 |
|
if ($length === null) { |
11556
|
2 |
|
$length_tmp = self::strlen($haystack, $encoding); |
11557
|
2 |
|
if ($length_tmp === false) { |
11558
|
|
|
return false; |
11559
|
|
|
} |
11560
|
2 |
|
$length = $length_tmp; |
11561
|
|
|
} |
11562
|
|
|
|
11563
|
2 |
|
if ($encoding === 'UTF-8') { |
11564
|
2 |
|
$haystack = (string) \mb_substr($haystack, $offset, $length); |
11565
|
|
|
} else { |
11566
|
2 |
|
$haystack = (string) \mb_substr($haystack, $offset, $length, $encoding); |
11567
|
|
|
} |
11568
|
|
|
} |
11569
|
|
|
|
11570
|
|
|
if ( |
11571
|
5 |
|
$encoding !== 'UTF-8' |
11572
|
|
|
&& |
11573
|
5 |
|
self::$SUPPORT['mbstring'] === false |
11574
|
|
|
) { |
11575
|
|
|
/** |
11576
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
11577
|
|
|
*/ |
11578
|
|
|
\trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
11579
|
|
|
} |
11580
|
|
|
|
11581
|
5 |
|
if (self::$SUPPORT['mbstring'] === true) { |
11582
|
5 |
|
if ($encoding === 'UTF-8') { |
11583
|
5 |
|
return \mb_substr_count($haystack, $needle); |
11584
|
|
|
} |
11585
|
|
|
|
11586
|
2 |
|
return \mb_substr_count($haystack, $needle, $encoding); |
11587
|
|
|
} |
11588
|
|
|
|
11589
|
|
|
\preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER); |
11590
|
|
|
|
11591
|
|
|
return \count($matches); |
11592
|
|
|
} |
11593
|
|
|
|
11594
|
|
|
/** |
11595
|
|
|
* Count the number of substring occurrences. |
11596
|
|
|
* |
11597
|
|
|
* @param string $haystack <p> |
11598
|
|
|
* The string being checked. |
11599
|
|
|
* </p> |
11600
|
|
|
* @param string $needle <p> |
11601
|
|
|
* The string being found. |
11602
|
|
|
* </p> |
11603
|
|
|
* @param int $offset [optional] <p> |
11604
|
|
|
* The offset where to start counting |
11605
|
|
|
* </p> |
11606
|
|
|
* @param int|null $length [optional] <p> |
11607
|
|
|
* The maximum length after the specified offset to search for the |
11608
|
|
|
* substring. It outputs a warning if the offset plus the length is |
11609
|
|
|
* greater than the haystack length. |
11610
|
|
|
* </p> |
11611
|
|
|
* |
11612
|
|
|
* @psalm-pure |
11613
|
|
|
* |
11614
|
|
|
* @return false|int |
11615
|
|
|
* <p>The number of times the |
11616
|
|
|
* needle substring occurs in the |
11617
|
|
|
* haystack string.</p> |
11618
|
|
|
*/ |
11619
|
4 |
|
public static function substr_count_in_byte( |
11620
|
|
|
string $haystack, |
11621
|
|
|
string $needle, |
11622
|
|
|
int $offset = 0, |
11623
|
|
|
int $length = null |
11624
|
|
|
) { |
11625
|
4 |
|
if ($haystack === '' || $needle === '') { |
11626
|
1 |
|
return 0; |
11627
|
|
|
} |
11628
|
|
|
|
11629
|
|
|
if ( |
11630
|
3 |
|
($offset || $length !== null) |
11631
|
|
|
&& |
11632
|
3 |
|
self::$SUPPORT['mbstring_func_overload'] === true |
11633
|
|
|
) { |
11634
|
|
|
if ($length === null) { |
11635
|
|
|
$length_tmp = self::strlen($haystack); |
11636
|
|
|
if ($length_tmp === false) { |
11637
|
|
|
return false; |
11638
|
|
|
} |
11639
|
|
|
$length = $length_tmp; |
11640
|
|
|
} |
11641
|
|
|
|
11642
|
|
|
if ( |
11643
|
|
|
( |
11644
|
|
|
$length !== 0 |
11645
|
|
|
&& |
11646
|
|
|
$offset !== 0 |
11647
|
|
|
) |
11648
|
|
|
&& |
11649
|
|
|
($length + $offset) <= 0 |
11650
|
|
|
&& |
11651
|
|
|
\PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1 |
11652
|
|
|
) { |
11653
|
|
|
return false; |
11654
|
|
|
} |
11655
|
|
|
|
11656
|
|
|
/** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */ |
11657
|
|
|
$haystack_tmp = \substr($haystack, $offset, $length); |
11658
|
|
|
if ($haystack_tmp === false) { |
11659
|
|
|
$haystack_tmp = ''; |
11660
|
|
|
} |
11661
|
|
|
$haystack = (string) $haystack_tmp; |
11662
|
|
|
} |
11663
|
|
|
|
11664
|
3 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
11665
|
|
|
// "mb_" is available if overload is used, so use it ... |
11666
|
|
|
return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT |
11667
|
|
|
} |
11668
|
|
|
|
11669
|
3 |
|
if ($length === null) { |
11670
|
3 |
|
return \substr_count($haystack, $needle, $offset); |
11671
|
|
|
} |
11672
|
|
|
|
11673
|
|
|
return \substr_count($haystack, $needle, $offset, $length); |
11674
|
|
|
} |
11675
|
|
|
|
11676
|
|
|
/** |
11677
|
|
|
* Returns the number of occurrences of $substring in the given string. |
11678
|
|
|
* By default, the comparison is case-sensitive, but can be made insensitive |
11679
|
|
|
* by setting $case_sensitive to false. |
11680
|
|
|
* |
11681
|
|
|
* @param string $str <p>The input string.</p> |
11682
|
|
|
* @param string $substring <p>The substring to search for.</p> |
11683
|
|
|
* @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
11684
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
11685
|
|
|
* |
11686
|
|
|
* @psalm-pure |
11687
|
|
|
* |
11688
|
|
|
* @return int |
11689
|
|
|
*/ |
11690
|
15 |
|
public static function substr_count_simple( |
11691
|
|
|
string $str, |
11692
|
|
|
string $substring, |
11693
|
|
|
bool $case_sensitive = true, |
11694
|
|
|
string $encoding = 'UTF-8' |
11695
|
|
|
): int { |
11696
|
15 |
|
if ($str === '' || $substring === '') { |
11697
|
2 |
|
return 0; |
11698
|
|
|
} |
11699
|
|
|
|
11700
|
13 |
|
if ($encoding === 'UTF-8') { |
11701
|
7 |
|
if ($case_sensitive) { |
11702
|
|
|
return (int) \mb_substr_count($str, $substring); |
11703
|
|
|
} |
11704
|
|
|
|
11705
|
7 |
|
return (int) \mb_substr_count( |
11706
|
7 |
|
\mb_strtoupper($str), |
11707
|
7 |
|
\mb_strtoupper($substring) |
11708
|
|
|
); |
11709
|
|
|
} |
11710
|
|
|
|
11711
|
6 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
11712
|
|
|
|
11713
|
6 |
|
if ($case_sensitive) { |
11714
|
3 |
|
return (int) \mb_substr_count($str, $substring, $encoding); |
11715
|
|
|
} |
11716
|
|
|
|
11717
|
3 |
|
return (int) \mb_substr_count( |
11718
|
3 |
|
self::strtocasefold($str, true, false, $encoding, null, false), |
11719
|
3 |
|
self::strtocasefold($substring, true, false, $encoding, null, false), |
11720
|
3 |
|
$encoding |
11721
|
|
|
); |
11722
|
|
|
} |
11723
|
|
|
|
11724
|
|
|
/** |
11725
|
|
|
* Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive. |
11726
|
|
|
* |
11727
|
|
|
* EXMAPLE: <code> |
11728
|
|
|
* UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd' |
11729
|
|
|
* UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd' |
11730
|
|
|
* </code> |
11731
|
|
|
* |
11732
|
|
|
* @param string $haystack <p>The string to search in.</p> |
11733
|
|
|
* @param string $needle <p>The substring to search for.</p> |
11734
|
|
|
* |
11735
|
|
|
* @psalm-pure |
11736
|
|
|
* |
11737
|
|
|
* @return string |
11738
|
|
|
* <p>Return the sub-string.</p> |
11739
|
|
|
*/ |
11740
|
2 |
|
public static function substr_ileft(string $haystack, string $needle): string |
11741
|
|
|
{ |
11742
|
2 |
|
if ($haystack === '') { |
11743
|
2 |
|
return ''; |
11744
|
|
|
} |
11745
|
|
|
|
11746
|
2 |
|
if ($needle === '') { |
11747
|
2 |
|
return $haystack; |
11748
|
|
|
} |
11749
|
|
|
|
11750
|
2 |
|
if (self::str_istarts_with($haystack, $needle)) { |
11751
|
2 |
|
$haystack = (string) \mb_substr($haystack, (int) self::strlen($needle)); |
11752
|
|
|
} |
11753
|
|
|
|
11754
|
2 |
|
return $haystack; |
11755
|
|
|
} |
11756
|
|
|
|
11757
|
|
|
/** |
11758
|
|
|
* Get part of a string process in bytes. |
11759
|
|
|
* |
11760
|
|
|
* @param string $str <p>The string being checked.</p> |
11761
|
|
|
* @param int $offset <p>The first position used in str.</p> |
11762
|
|
|
* @param int|null $length [optional] <p>The maximum length of the returned string.</p> |
11763
|
|
|
* |
11764
|
|
|
* @psalm-pure |
11765
|
|
|
* |
11766
|
|
|
* @return false|string |
11767
|
|
|
* The portion of <i>str</i> specified by the <i>offset</i> and |
11768
|
|
|
* <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i> |
11769
|
|
|
* characters long, <b>FALSE</b> will be returned. |
11770
|
|
|
*/ |
11771
|
1 |
|
public static function substr_in_byte(string $str, int $offset = 0, int $length = null) |
11772
|
|
|
{ |
11773
|
|
|
// empty string |
11774
|
1 |
|
if ($str === '' || $length === 0) { |
11775
|
|
|
return ''; |
11776
|
|
|
} |
11777
|
|
|
|
11778
|
|
|
// whole string |
11779
|
1 |
|
if (!$offset && $length === null) { |
11780
|
|
|
return $str; |
11781
|
|
|
} |
11782
|
|
|
|
11783
|
1 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
11784
|
|
|
// "mb_" is available if overload is used, so use it ... |
11785
|
|
|
return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT |
11786
|
|
|
} |
11787
|
|
|
|
11788
|
1 |
|
return \substr($str, $offset, $length ?? 2147483647); |
11789
|
|
|
} |
11790
|
|
|
|
11791
|
|
|
/** |
11792
|
|
|
* Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive. |
11793
|
|
|
* |
11794
|
|
|
* EXAMPLE: <code> |
11795
|
|
|
* UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle' |
11796
|
|
|
* UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle' |
11797
|
|
|
* </code> |
11798
|
|
|
* |
11799
|
|
|
* @param string $haystack <p>The string to search in.</p> |
11800
|
|
|
* @param string $needle <p>The substring to search for.</p> |
11801
|
|
|
* |
11802
|
|
|
* @psalm-pure |
11803
|
|
|
* |
11804
|
|
|
* @return string |
11805
|
|
|
* <p>Return the sub-string.<p> |
11806
|
|
|
*/ |
11807
|
2 |
|
public static function substr_iright(string $haystack, string $needle): string |
11808
|
|
|
{ |
11809
|
2 |
|
if ($haystack === '') { |
11810
|
2 |
|
return ''; |
11811
|
|
|
} |
11812
|
|
|
|
11813
|
2 |
|
if ($needle === '') { |
11814
|
2 |
|
return $haystack; |
11815
|
|
|
} |
11816
|
|
|
|
11817
|
2 |
|
if (self::str_iends_with($haystack, $needle)) { |
11818
|
2 |
|
$haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle)); |
11819
|
|
|
} |
11820
|
|
|
|
11821
|
2 |
|
return $haystack; |
11822
|
|
|
} |
11823
|
|
|
|
11824
|
|
|
/** |
11825
|
|
|
* Removes a prefix ($needle) from the beginning of the string ($haystack). |
11826
|
|
|
* |
11827
|
|
|
* EXAMPLE: <code> |
11828
|
|
|
* UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd' |
11829
|
|
|
* UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd' |
11830
|
|
|
* </code> |
11831
|
|
|
* |
11832
|
|
|
* @param string $haystack <p>The string to search in.</p> |
11833
|
|
|
* @param string $needle <p>The substring to search for.</p> |
11834
|
|
|
* |
11835
|
|
|
* @psalm-pure |
11836
|
|
|
* |
11837
|
|
|
* @return string |
11838
|
|
|
* <p>Return the sub-string.</p> |
11839
|
|
|
*/ |
11840
|
2 |
|
public static function substr_left(string $haystack, string $needle): string |
11841
|
|
|
{ |
11842
|
2 |
|
if ($haystack === '') { |
11843
|
2 |
|
return ''; |
11844
|
|
|
} |
11845
|
|
|
|
11846
|
2 |
|
if ($needle === '') { |
11847
|
2 |
|
return $haystack; |
11848
|
|
|
} |
11849
|
|
|
|
11850
|
2 |
|
if (self::str_starts_with($haystack, $needle)) { |
11851
|
2 |
|
$haystack = (string) \mb_substr($haystack, (int) self::strlen($needle)); |
11852
|
|
|
} |
11853
|
|
|
|
11854
|
2 |
|
return $haystack; |
11855
|
|
|
} |
11856
|
|
|
|
11857
|
|
|
/** |
11858
|
|
|
* Replace text within a portion of a string. |
11859
|
|
|
* |
11860
|
|
|
* EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code> |
11861
|
|
|
* |
11862
|
|
|
* source: https://gist.github.com/stemar/8287074 |
11863
|
|
|
* |
11864
|
|
|
* @param string|string[] $str <p>The input string or an array of stings.</p> |
11865
|
|
|
* @param string|string[] $replacement <p>The replacement string or an array of stings.</p> |
11866
|
|
|
* @param int|int[] $offset <p> |
11867
|
|
|
* If start is positive, the replacing will begin at the start'th offset |
11868
|
|
|
* into string. |
11869
|
|
|
* <br><br> |
11870
|
|
|
* If start is negative, the replacing will begin at the start'th character |
11871
|
|
|
* from the end of string. |
11872
|
|
|
* </p> |
11873
|
|
|
* @param int|int[]|null $length [optional] <p>If given and is positive, it represents the length of the |
11874
|
|
|
* portion of string which is to be replaced. If it is negative, it |
11875
|
|
|
* represents the number of characters from the end of string at which to |
11876
|
|
|
* stop replacing. If it is not given, then it will default to strlen( |
11877
|
|
|
* string ); i.e. end the replacing at the end of string. Of course, if |
11878
|
|
|
* length is zero then this function will have the effect of inserting |
11879
|
|
|
* replacement into string at the given start offset.</p> |
11880
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
11881
|
|
|
* |
11882
|
|
|
* @psalm-pure |
11883
|
|
|
* |
11884
|
|
|
* @return string|string[] |
11885
|
|
|
* <p>The result string is returned. If string is an array then array is returned.</p> |
11886
|
|
|
* |
11887
|
|
|
* @template TSubstrReplace |
11888
|
|
|
* @phpstan-param TSubstrReplace $str |
11889
|
|
|
* @phpstan-return TSubstrReplace |
11890
|
|
|
*/ |
11891
|
10 |
|
public static function substr_replace( |
11892
|
|
|
$str, |
11893
|
|
|
$replacement, |
11894
|
|
|
$offset, |
11895
|
|
|
$length = null, |
11896
|
|
|
string $encoding = 'UTF-8' |
11897
|
|
|
) { |
11898
|
10 |
|
if (\is_array($str)) { |
11899
|
1 |
|
$num = \count($str); |
11900
|
|
|
|
11901
|
|
|
// the replacement |
11902
|
1 |
|
if (\is_array($replacement)) { |
11903
|
1 |
|
$replacement = \array_slice($replacement, 0, $num); |
11904
|
|
|
} else { |
11905
|
1 |
|
$replacement = \array_pad([$replacement], $num, $replacement); |
11906
|
|
|
} |
11907
|
|
|
|
11908
|
|
|
// the offset |
11909
|
1 |
|
if (\is_array($offset)) { |
11910
|
1 |
|
$offset = \array_slice($offset, 0, $num); |
11911
|
1 |
|
foreach ($offset as &$value_tmp) { |
11912
|
1 |
|
$value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0; |
11913
|
|
|
} |
11914
|
1 |
|
unset($value_tmp); |
11915
|
|
|
} else { |
11916
|
1 |
|
$offset = \array_pad([$offset], $num, $offset); |
11917
|
|
|
} |
11918
|
|
|
|
11919
|
|
|
// the length |
11920
|
1 |
|
if ($length === null) { |
11921
|
1 |
|
$length = \array_fill(0, $num, 0); |
11922
|
1 |
|
} elseif (\is_array($length)) { |
11923
|
1 |
|
$length = \array_slice($length, 0, $num); |
11924
|
1 |
|
foreach ($length as &$value_tmp_V2) { |
11925
|
1 |
|
$value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num; |
11926
|
|
|
} |
11927
|
1 |
|
unset($value_tmp_V2); |
11928
|
|
|
} else { |
11929
|
1 |
|
$length = \array_pad([$length], $num, $length); |
11930
|
|
|
} |
11931
|
|
|
|
11932
|
|
|
// recursive call |
11933
|
|
|
/** @phpstan-ignore-next-line - phpstan currently can't handle recursive calls */ |
11934
|
1 |
|
return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length); |
11935
|
|
|
} |
11936
|
|
|
|
11937
|
10 |
|
if (\is_array($replacement)) { |
11938
|
1 |
|
if ($replacement !== []) { |
11939
|
1 |
|
$replacement = $replacement[0]; |
11940
|
|
|
} else { |
11941
|
1 |
|
$replacement = ''; |
11942
|
|
|
} |
11943
|
|
|
} |
11944
|
|
|
|
11945
|
|
|
// init |
11946
|
10 |
|
$str = (string) $str; |
11947
|
10 |
|
$replacement = (string) $replacement; |
11948
|
|
|
|
11949
|
10 |
|
if (\is_array($length)) { |
11950
|
|
|
throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.'); |
11951
|
|
|
} |
11952
|
|
|
|
11953
|
10 |
|
if (\is_array($offset)) { |
11954
|
|
|
throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.'); |
11955
|
|
|
} |
11956
|
|
|
|
11957
|
10 |
|
if ($str === '') { |
11958
|
5 |
|
return $replacement; |
11959
|
|
|
} |
11960
|
|
|
|
11961
|
9 |
|
if (self::$SUPPORT['mbstring'] === true) { |
11962
|
9 |
|
$string_length = (int) self::strlen($str, $encoding); |
11963
|
|
|
|
11964
|
9 |
|
if ($offset < 0) { |
11965
|
1 |
|
$offset = (int) \max(0, $string_length + $offset); |
11966
|
9 |
|
} elseif ($offset > $string_length) { |
11967
|
1 |
|
$offset = $string_length; |
11968
|
|
|
} |
11969
|
|
|
|
11970
|
9 |
|
if ($length !== null && $length < 0) { |
11971
|
1 |
|
$length = (int) \max(0, $string_length - $offset + $length); |
11972
|
9 |
|
} elseif ($length === null || $length > $string_length) { |
11973
|
4 |
|
$length = $string_length; |
11974
|
|
|
} |
11975
|
|
|
|
11976
|
9 |
|
if (($offset + $length) > $string_length) { |
11977
|
4 |
|
$length = $string_length - $offset; |
11978
|
|
|
} |
11979
|
|
|
|
11980
|
9 |
|
return ((string) \mb_substr($str, 0, $offset, $encoding)) . |
11981
|
9 |
|
$replacement . |
11982
|
9 |
|
((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding)); |
11983
|
|
|
} |
11984
|
|
|
|
11985
|
|
|
// |
11986
|
|
|
// fallback for ascii only |
11987
|
|
|
// |
11988
|
|
|
|
11989
|
|
|
if (ASCII::is_ascii($str)) { |
11990
|
|
|
return ($length === null) ? |
11991
|
|
|
\substr_replace($str, $replacement, $offset) : |
11992
|
|
|
\substr_replace($str, $replacement, $offset, $length); |
11993
|
|
|
} |
11994
|
|
|
|
11995
|
|
|
// |
11996
|
|
|
// fallback via vanilla php |
11997
|
|
|
// |
11998
|
|
|
|
11999
|
|
|
\preg_match_all('/./us', $str, $str_matches); |
12000
|
|
|
\preg_match_all('/./us', $replacement, $replacement_matches); |
12001
|
|
|
|
12002
|
|
|
if ($length === null) { |
12003
|
|
|
$length_tmp = self::strlen($str, $encoding); |
12004
|
|
|
if ($length_tmp === false) { |
12005
|
|
|
// e.g.: non mbstring support + invalid chars |
12006
|
|
|
return ''; |
12007
|
|
|
} |
12008
|
|
|
$length = $length_tmp; |
12009
|
|
|
} |
12010
|
|
|
|
12011
|
|
|
\array_splice($str_matches[0], $offset, $length, $replacement_matches[0]); |
12012
|
|
|
|
12013
|
|
|
return \implode('', $str_matches[0]); |
12014
|
|
|
} |
12015
|
|
|
|
12016
|
|
|
/** |
12017
|
|
|
* Removes a suffix ($needle) from the end of the string ($haystack). |
12018
|
|
|
* |
12019
|
|
|
* EXAMPLE: <code> |
12020
|
|
|
* UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle' |
12021
|
|
|
* UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε' |
12022
|
|
|
* </code> |
12023
|
|
|
* |
12024
|
|
|
* @param string $haystack <p>The string to search in.</p> |
12025
|
|
|
* @param string $needle <p>The substring to search for.</p> |
12026
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
12027
|
|
|
* |
12028
|
|
|
* @psalm-pure |
12029
|
|
|
* |
12030
|
|
|
* @return string |
12031
|
|
|
* <p>Return the sub-string.</p> |
12032
|
|
|
*/ |
12033
|
2 |
|
public static function substr_right( |
12034
|
|
|
string $haystack, |
12035
|
|
|
string $needle, |
12036
|
|
|
string $encoding = 'UTF-8' |
12037
|
|
|
): string { |
12038
|
2 |
|
if ($haystack === '') { |
12039
|
2 |
|
return ''; |
12040
|
|
|
} |
12041
|
|
|
|
12042
|
2 |
|
if ($needle === '') { |
12043
|
2 |
|
return $haystack; |
12044
|
|
|
} |
12045
|
|
|
|
12046
|
|
|
if ( |
12047
|
2 |
|
$encoding === 'UTF-8' |
12048
|
|
|
&& |
12049
|
2 |
|
\substr($haystack, -\strlen($needle)) === $needle |
12050
|
|
|
) { |
12051
|
2 |
|
return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle)); |
12052
|
|
|
} |
12053
|
|
|
|
12054
|
2 |
|
if (\substr($haystack, -\strlen($needle)) === $needle) { |
12055
|
|
|
return (string) self::substr( |
12056
|
|
|
$haystack, |
12057
|
|
|
0, |
12058
|
|
|
(int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding), |
12059
|
|
|
$encoding |
12060
|
|
|
); |
12061
|
|
|
} |
12062
|
|
|
|
12063
|
2 |
|
return $haystack; |
12064
|
|
|
} |
12065
|
|
|
|
12066
|
|
|
/** |
12067
|
|
|
* Returns a case swapped version of the string. |
12068
|
|
|
* |
12069
|
|
|
* EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code> |
12070
|
|
|
* |
12071
|
|
|
* @param string $str <p>The input string.</p> |
12072
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
12073
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
12074
|
|
|
* |
12075
|
|
|
* @psalm-pure |
12076
|
|
|
* |
12077
|
|
|
* @return string |
12078
|
|
|
* <p>Each character's case swapped.</p> |
12079
|
|
|
*/ |
12080
|
6 |
|
public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string |
12081
|
|
|
{ |
12082
|
6 |
|
if ($str === '') { |
12083
|
1 |
|
return ''; |
12084
|
|
|
} |
12085
|
|
|
|
12086
|
6 |
|
if ($clean_utf8) { |
12087
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
12088
|
|
|
// if invalid characters are found in $haystack before $needle |
12089
|
2 |
|
$str = self::clean($str); |
12090
|
|
|
} |
12091
|
|
|
|
12092
|
6 |
|
if ($encoding === 'UTF-8') { |
12093
|
4 |
|
return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str); |
12094
|
|
|
} |
12095
|
|
|
|
12096
|
4 |
|
return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str); |
12097
|
|
|
} |
12098
|
|
|
|
12099
|
|
|
/** |
12100
|
|
|
* Checks whether symfony-polyfills are used. |
12101
|
|
|
* |
12102
|
|
|
* @psalm-pure |
12103
|
|
|
* |
12104
|
|
|
* @return bool |
12105
|
|
|
* <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p> |
12106
|
|
|
* |
12107
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
12108
|
|
|
*/ |
12109
|
|
|
public static function symfony_polyfill_used(): bool |
12110
|
|
|
{ |
12111
|
|
|
// init |
12112
|
|
|
$return = false; |
12113
|
|
|
|
12114
|
|
|
$return_tmp = \extension_loaded('mbstring'); |
12115
|
|
|
if (!$return_tmp && \function_exists('mb_strlen')) { |
12116
|
|
|
$return = true; |
12117
|
|
|
} |
12118
|
|
|
|
12119
|
|
|
$return_tmp = \extension_loaded('iconv'); |
12120
|
|
|
if (!$return_tmp && \function_exists('iconv')) { |
12121
|
|
|
$return = true; |
12122
|
|
|
} |
12123
|
|
|
|
12124
|
|
|
return $return; |
12125
|
|
|
} |
12126
|
|
|
|
12127
|
|
|
/** |
12128
|
|
|
* @param string $str |
12129
|
|
|
* @param int $tab_length |
12130
|
|
|
* |
12131
|
|
|
* @psalm-pure |
12132
|
|
|
* |
12133
|
|
|
* @return string |
12134
|
|
|
*/ |
12135
|
6 |
|
public static function tabs_to_spaces(string $str, int $tab_length = 4): string |
12136
|
|
|
{ |
12137
|
6 |
|
if ($tab_length === 4) { |
12138
|
3 |
|
$spaces = ' '; |
12139
|
3 |
|
} elseif ($tab_length === 2) { |
12140
|
1 |
|
$spaces = ' '; |
12141
|
|
|
} else { |
12142
|
2 |
|
$spaces = \str_repeat(' ', $tab_length); |
12143
|
|
|
} |
12144
|
|
|
|
12145
|
6 |
|
return \str_replace("\t", $spaces, $str); |
12146
|
|
|
} |
12147
|
|
|
|
12148
|
|
|
/** |
12149
|
|
|
* Converts the first character of each word in the string to uppercase |
12150
|
|
|
* and all other chars to lowercase. |
12151
|
|
|
* |
12152
|
|
|
* @param string $str <p>The input string.</p> |
12153
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
12154
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
12155
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
12156
|
|
|
* tr</p> |
12157
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
12158
|
|
|
* -> ß</p> |
12159
|
|
|
* |
12160
|
|
|
* @psalm-pure |
12161
|
|
|
* |
12162
|
|
|
* @return string |
12163
|
|
|
* <p>A string with all characters of $str being title-cased.</p> |
12164
|
|
|
*/ |
12165
|
5 |
|
public static function titlecase( |
12166
|
|
|
string $str, |
12167
|
|
|
string $encoding = 'UTF-8', |
12168
|
|
|
bool $clean_utf8 = false, |
12169
|
|
|
string $lang = null, |
12170
|
|
|
bool $try_to_keep_the_string_length = false |
12171
|
|
|
): string { |
12172
|
5 |
|
if ($clean_utf8) { |
12173
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
12174
|
|
|
// if invalid characters are found in $haystack before $needle |
12175
|
|
|
$str = self::clean($str); |
12176
|
|
|
} |
12177
|
|
|
|
12178
|
|
|
if ( |
12179
|
5 |
|
$lang === null |
12180
|
|
|
&& |
12181
|
5 |
|
!$try_to_keep_the_string_length |
12182
|
|
|
) { |
12183
|
5 |
|
if ($encoding === 'UTF-8') { |
12184
|
3 |
|
return \mb_convert_case($str, \MB_CASE_TITLE); |
12185
|
|
|
} |
12186
|
|
|
|
12187
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
12188
|
|
|
|
12189
|
2 |
|
return \mb_convert_case($str, \MB_CASE_TITLE, $encoding); |
12190
|
|
|
} |
12191
|
|
|
|
12192
|
|
|
return self::str_titleize( |
12193
|
|
|
$str, |
12194
|
|
|
null, |
12195
|
|
|
$encoding, |
12196
|
|
|
false, |
12197
|
|
|
$lang, |
12198
|
|
|
$try_to_keep_the_string_length, |
12199
|
|
|
false |
12200
|
|
|
); |
12201
|
|
|
} |
12202
|
|
|
|
12203
|
|
|
/** |
12204
|
|
|
* Convert a string into ASCII. |
12205
|
|
|
* |
12206
|
|
|
* EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code> |
12207
|
|
|
* |
12208
|
|
|
* @param string $str <p>The input string.</p> |
12209
|
|
|
* @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p> |
12210
|
|
|
* @param bool $strict [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad |
12211
|
|
|
* performance</p> |
12212
|
|
|
* |
12213
|
|
|
* @psalm-pure |
12214
|
|
|
* |
12215
|
|
|
* @return string |
12216
|
|
|
*/ |
12217
|
37 |
|
public static function to_ascii( |
12218
|
|
|
string $str, |
12219
|
|
|
string $unknown = '?', |
12220
|
|
|
bool $strict = false |
12221
|
|
|
): string { |
12222
|
37 |
|
return ASCII::to_transliterate($str, $unknown, $strict); |
12223
|
|
|
} |
12224
|
|
|
|
12225
|
|
|
/** |
12226
|
|
|
* @param bool|float|int|string $str |
12227
|
|
|
* |
12228
|
|
|
* @psalm-pure |
12229
|
|
|
* |
12230
|
|
|
* @return bool |
12231
|
|
|
*/ |
12232
|
25 |
|
public static function to_boolean($str): bool |
12233
|
|
|
{ |
12234
|
|
|
// init |
12235
|
25 |
|
$str = (string) $str; |
12236
|
|
|
|
12237
|
25 |
|
if ($str === '') { |
12238
|
2 |
|
return false; |
12239
|
|
|
} |
12240
|
|
|
|
12241
|
|
|
// Info: http://php.net/manual/en/filter.filters.validate.php |
12242
|
23 |
|
$map = [ |
12243
|
|
|
'true' => true, |
12244
|
|
|
'1' => true, |
12245
|
|
|
'on' => true, |
12246
|
|
|
'yes' => true, |
12247
|
|
|
'false' => false, |
12248
|
|
|
'0' => false, |
12249
|
|
|
'off' => false, |
12250
|
|
|
'no' => false, |
12251
|
|
|
]; |
12252
|
|
|
|
12253
|
23 |
|
if (isset($map[$str])) { |
12254
|
13 |
|
return $map[$str]; |
12255
|
|
|
} |
12256
|
|
|
|
12257
|
10 |
|
$key = \strtolower($str); |
12258
|
10 |
|
if (isset($map[$key])) { |
12259
|
2 |
|
return $map[$key]; |
12260
|
|
|
} |
12261
|
|
|
|
12262
|
8 |
|
if (\is_numeric($str)) { |
12263
|
6 |
|
return ((float) $str) > 0; |
12264
|
|
|
} |
12265
|
|
|
|
12266
|
2 |
|
return (bool) \trim($str); |
12267
|
|
|
} |
12268
|
|
|
|
12269
|
|
|
/** |
12270
|
|
|
* Convert given string to safe filename (and keep string case). |
12271
|
|
|
* |
12272
|
|
|
* @param string $str |
12273
|
|
|
* @param bool $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are |
12274
|
|
|
* simply replaced with hyphen. |
12275
|
|
|
* @param string $fallback_char |
12276
|
|
|
* |
12277
|
|
|
* @psalm-pure |
12278
|
|
|
* |
12279
|
|
|
* @return string |
12280
|
|
|
*/ |
12281
|
1 |
|
public static function to_filename( |
12282
|
|
|
string $str, |
12283
|
|
|
bool $use_transliterate = false, |
12284
|
|
|
string $fallback_char = '-' |
12285
|
|
|
): string { |
12286
|
1 |
|
return ASCII::to_filename( |
12287
|
1 |
|
$str, |
12288
|
|
|
$use_transliterate, |
12289
|
|
|
$fallback_char |
12290
|
|
|
); |
12291
|
|
|
} |
12292
|
|
|
|
12293
|
|
|
/** |
12294
|
|
|
* Convert a string into "ISO-8859"-encoding (Latin-1). |
12295
|
|
|
* |
12296
|
|
|
* EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859(' -ABC-中文空白- ')); // ' -ABC-????- '</code> |
12297
|
|
|
* |
12298
|
|
|
* @param string|string[] $str |
12299
|
|
|
* |
12300
|
|
|
* @psalm-pure |
12301
|
|
|
* |
12302
|
|
|
* @return string|string[] |
12303
|
|
|
* |
12304
|
|
|
* @template TToIso8859 |
12305
|
|
|
* @phpstan-param TToIso8859 $str |
12306
|
|
|
* @phpstan-return TToIso8859 |
12307
|
|
|
*/ |
12308
|
8 |
|
public static function to_iso8859($str) |
12309
|
|
|
{ |
12310
|
8 |
|
if (\is_array($str)) { |
12311
|
2 |
|
foreach ($str as &$v) { |
12312
|
2 |
|
$v = self::to_iso8859($v); |
12313
|
|
|
} |
12314
|
|
|
|
12315
|
2 |
|
return $str; |
12316
|
|
|
} |
12317
|
|
|
|
12318
|
8 |
|
$str = (string) $str; |
12319
|
8 |
|
if ($str === '') { |
12320
|
2 |
|
return ''; |
12321
|
|
|
} |
12322
|
|
|
|
12323
|
8 |
|
return self::utf8_decode($str); |
12324
|
|
|
} |
12325
|
|
|
|
12326
|
|
|
/** |
12327
|
|
|
* This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8. |
12328
|
|
|
* |
12329
|
|
|
* <ul> |
12330
|
|
|
* <li>It decode UTF-8 codepoints and Unicode escape sequences.</li> |
12331
|
|
|
* <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li> |
12332
|
|
|
* <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this |
12333
|
|
|
* case.</li> |
12334
|
|
|
* </ul> |
12335
|
|
|
* |
12336
|
|
|
* EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code> |
12337
|
|
|
* |
12338
|
|
|
* @param string|string[] $str <p>Any string or array of strings.</p> |
12339
|
|
|
* @param bool $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p> |
12340
|
|
|
* |
12341
|
|
|
* @psalm-pure |
12342
|
|
|
* |
12343
|
|
|
* @return string|string[] |
12344
|
|
|
* <p>The UTF-8 encoded string</p> |
12345
|
|
|
* |
12346
|
|
|
* @template TToUtf8 |
12347
|
|
|
* @phpstan-param TToUtf8 $str |
12348
|
|
|
* @phpstan-return TToUtf8 |
12349
|
|
|
*/ |
12350
|
41 |
|
public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false) |
12351
|
|
|
{ |
12352
|
41 |
|
if (\is_array($str)) { |
12353
|
4 |
|
foreach ($str as &$v) { |
12354
|
4 |
|
$v = self::to_utf8_string($v, $decode_html_entity_to_utf8); |
12355
|
|
|
} |
12356
|
|
|
|
12357
|
|
|
/** @phpstan-var TToUtf8 $str */ |
12358
|
4 |
|
return $str; |
12359
|
|
|
} |
12360
|
|
|
|
12361
|
|
|
/** @phpstan-var TToUtf8 $str */ |
12362
|
41 |
|
$str = self::to_utf8_string($str, $decode_html_entity_to_utf8); |
12363
|
|
|
|
12364
|
41 |
|
return $str; |
12365
|
|
|
} |
12366
|
|
|
|
12367
|
|
|
/** |
12368
|
|
|
* This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8. |
12369
|
|
|
* |
12370
|
|
|
* <ul> |
12371
|
|
|
* <li>It decode UTF-8 codepoints and Unicode escape sequences.</li> |
12372
|
|
|
* <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li> |
12373
|
|
|
* <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this |
12374
|
|
|
* case.</li> |
12375
|
|
|
* </ul> |
12376
|
|
|
* |
12377
|
|
|
* EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code> |
12378
|
|
|
* |
12379
|
|
|
* @param string $str <p>Any string.</p> |
12380
|
|
|
* @param bool $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p> |
12381
|
|
|
* |
12382
|
|
|
* @psalm-pure |
12383
|
|
|
* |
12384
|
|
|
* @return string |
12385
|
|
|
* <p>The UTF-8 encoded string</p> |
12386
|
|
|
*/ |
12387
|
41 |
|
public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string |
12388
|
|
|
{ |
12389
|
41 |
|
if ($str === '') { |
12390
|
7 |
|
return $str; |
12391
|
|
|
} |
12392
|
|
|
|
12393
|
41 |
|
$max = \strlen($str); |
12394
|
41 |
|
$buf = ''; |
12395
|
|
|
|
12396
|
41 |
|
for ($i = 0; $i < $max; ++$i) { |
12397
|
41 |
|
$c1 = $str[$i]; |
12398
|
|
|
|
12399
|
41 |
|
if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already |
12400
|
|
|
|
12401
|
37 |
|
if ($c1 <= "\xDF") { // looks like 2 bytes UTF8 |
12402
|
|
|
|
12403
|
34 |
|
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; |
12404
|
|
|
|
12405
|
34 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already |
12406
|
20 |
|
$buf .= $c1 . $c2; |
12407
|
20 |
|
++$i; |
12408
|
|
|
} else { // not valid UTF8 - convert it |
12409
|
34 |
|
$buf .= self::to_utf8_convert_helper($c1); |
12410
|
|
|
} |
12411
|
34 |
|
} elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8 |
12412
|
|
|
|
12413
|
34 |
|
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; |
12414
|
34 |
|
$c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2]; |
12415
|
|
|
|
12416
|
34 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already |
12417
|
17 |
|
$buf .= $c1 . $c2 . $c3; |
12418
|
17 |
|
$i += 2; |
12419
|
|
|
} else { // not valid UTF8 - convert it |
12420
|
34 |
|
$buf .= self::to_utf8_convert_helper($c1); |
12421
|
|
|
} |
12422
|
27 |
|
} elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8 |
12423
|
|
|
|
12424
|
27 |
|
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; |
12425
|
27 |
|
$c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2]; |
12426
|
27 |
|
$c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3]; |
12427
|
|
|
|
12428
|
27 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already |
12429
|
10 |
|
$buf .= $c1 . $c2 . $c3 . $c4; |
12430
|
10 |
|
$i += 3; |
12431
|
|
|
} else { // not valid UTF8 - convert it |
12432
|
27 |
|
$buf .= self::to_utf8_convert_helper($c1); |
12433
|
|
|
} |
12434
|
|
|
} else { // doesn't look like UTF8, but should be converted |
12435
|
|
|
|
12436
|
37 |
|
$buf .= self::to_utf8_convert_helper($c1); |
12437
|
|
|
} |
12438
|
39 |
|
} elseif (($c1 & "\xC0") === "\x80") { // needs conversion |
12439
|
|
|
|
12440
|
3 |
|
$buf .= self::to_utf8_convert_helper($c1); |
12441
|
|
|
} else { // it doesn't need conversion |
12442
|
|
|
|
12443
|
39 |
|
$buf .= $c1; |
12444
|
|
|
} |
12445
|
|
|
} |
12446
|
|
|
|
12447
|
|
|
// decode unicode escape sequences + unicode surrogate pairs |
12448
|
41 |
|
$buf = \preg_replace_callback( |
12449
|
41 |
|
'/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/', |
12450
|
|
|
/** |
12451
|
|
|
* @param array $matches |
12452
|
|
|
* |
12453
|
|
|
* @psalm-pure |
12454
|
|
|
* |
12455
|
|
|
* @return string |
12456
|
|
|
*/ |
12457
|
41 |
|
static function (array $matches): string { |
12458
|
13 |
|
if (isset($matches[3])) { |
12459
|
13 |
|
$cp = (int) \hexdec($matches[3]); |
12460
|
|
|
} else { |
12461
|
|
|
// http://unicode.org/faq/utf_bom.html#utf16-4 |
12462
|
1 |
|
$cp = ((int) \hexdec($matches[1]) << 10) |
12463
|
1 |
|
+ (int) \hexdec($matches[2]) |
12464
|
1 |
|
+ 0x10000 |
12465
|
1 |
|
- (0xD800 << 10) |
12466
|
1 |
|
- 0xDC00; |
12467
|
|
|
} |
12468
|
|
|
|
12469
|
|
|
// https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471 |
12470
|
|
|
// |
12471
|
|
|
// php_utf32_utf8(unsigned char *buf, unsigned k) |
12472
|
|
|
|
12473
|
13 |
|
if ($cp < 0x80) { |
12474
|
8 |
|
return (string) self::chr($cp); |
12475
|
|
|
} |
12476
|
|
|
|
12477
|
10 |
|
if ($cp < 0xA0) { |
12478
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
12479
|
|
|
return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F); |
12480
|
|
|
} |
12481
|
|
|
|
12482
|
10 |
|
return self::decimal_to_chr($cp); |
12483
|
41 |
|
}, |
12484
|
41 |
|
$buf |
12485
|
|
|
); |
12486
|
|
|
|
12487
|
41 |
|
if ($buf === null) { |
12488
|
|
|
return ''; |
12489
|
|
|
} |
12490
|
|
|
|
12491
|
|
|
// decode UTF-8 codepoints |
12492
|
41 |
|
if ($decode_html_entity_to_utf8) { |
12493
|
3 |
|
$buf = self::html_entity_decode($buf); |
12494
|
|
|
} |
12495
|
|
|
|
12496
|
41 |
|
return $buf; |
12497
|
|
|
} |
12498
|
|
|
|
12499
|
|
|
/** |
12500
|
|
|
* Returns the given string as an integer, or null if the string isn't numeric. |
12501
|
|
|
* |
12502
|
|
|
* @param string $str |
12503
|
|
|
* |
12504
|
|
|
* @psalm-pure |
12505
|
|
|
* |
12506
|
|
|
* @return int|null |
12507
|
|
|
* <p>null if the string isn't numeric</p> |
12508
|
|
|
*/ |
12509
|
1 |
|
public static function to_int(string $str) |
12510
|
|
|
{ |
12511
|
1 |
|
if (\is_numeric($str)) { |
12512
|
1 |
|
return (int) $str; |
12513
|
|
|
} |
12514
|
|
|
|
12515
|
1 |
|
return null; |
12516
|
|
|
} |
12517
|
|
|
|
12518
|
|
|
/** |
12519
|
|
|
* Returns the given input as string, or null if the input isn't int|float|string |
12520
|
|
|
* and do not implement the "__toString()" method. |
12521
|
|
|
* |
12522
|
|
|
* @param float|int|object|string|null $input |
12523
|
|
|
* |
12524
|
|
|
* @psalm-pure |
12525
|
|
|
* |
12526
|
|
|
* @return string|null |
12527
|
|
|
* <p>null if the input isn't int|float|string and has no "__toString()" method</p> |
12528
|
|
|
*/ |
12529
|
1 |
|
public static function to_string($input) |
12530
|
|
|
{ |
12531
|
1 |
|
if ($input === null) { |
12532
|
|
|
return null; |
12533
|
|
|
} |
12534
|
|
|
|
12535
|
|
|
/** @var string $input_type - hack for psalm */ |
12536
|
1 |
|
$input_type = \gettype($input); |
12537
|
|
|
|
12538
|
|
|
if ( |
12539
|
1 |
|
$input_type === 'string' |
12540
|
|
|
|| |
12541
|
1 |
|
$input_type === 'integer' |
12542
|
|
|
|| |
12543
|
1 |
|
$input_type === 'float' |
12544
|
|
|
|| |
12545
|
1 |
|
$input_type === 'double' |
12546
|
|
|
) { |
12547
|
1 |
|
return (string) $input; |
12548
|
|
|
} |
12549
|
|
|
|
12550
|
|
|
/** @phpstan-ignore-next-line - "gettype": FP? */ |
12551
|
1 |
|
if ($input_type === 'object' && \method_exists($input, '__toString')) { |
12552
|
1 |
|
return (string) $input; |
12553
|
|
|
} |
12554
|
|
|
|
12555
|
1 |
|
return null; |
12556
|
|
|
} |
12557
|
|
|
|
12558
|
|
|
/** |
12559
|
|
|
* Strip whitespace or other characters from the beginning and end of a UTF-8 string. |
12560
|
|
|
* |
12561
|
|
|
* INFO: This is slower then "trim()" |
12562
|
|
|
* |
12563
|
|
|
* We can only use the original-function, if we use <= 7-Bit in the string / chars |
12564
|
|
|
* but the check for ASCII (7-Bit) cost more time, then we can safe here. |
12565
|
|
|
* |
12566
|
|
|
* EXAMPLE: <code>UTF8::trim(' -ABC-中文空白- '); // '-ABC-中文空白-'</code> |
12567
|
|
|
* |
12568
|
|
|
* @param string $str <p>The string to be trimmed</p> |
12569
|
|
|
* @param string|null $chars [optional] <p>Optional characters to be stripped</p> |
12570
|
|
|
* |
12571
|
|
|
* @psalm-pure |
12572
|
|
|
* |
12573
|
|
|
* @return string |
12574
|
|
|
* <p>The trimmed string.</p> |
12575
|
|
|
*/ |
12576
|
57 |
|
public static function trim(string $str = '', string $chars = null): string |
12577
|
|
|
{ |
12578
|
57 |
|
if ($str === '') { |
12579
|
9 |
|
return ''; |
12580
|
|
|
} |
12581
|
|
|
|
12582
|
50 |
|
if (self::$SUPPORT['mbstring'] === true) { |
12583
|
50 |
|
if ($chars !== null) { |
12584
|
|
|
/** @noinspection PregQuoteUsageInspection */ |
12585
|
28 |
|
$chars = \preg_quote($chars); |
12586
|
28 |
|
$pattern = "^[${chars}]+|[${chars}]+\$"; |
12587
|
|
|
} else { |
12588
|
22 |
|
$pattern = '^[\\s]+|[\\s]+$'; |
12589
|
|
|
} |
12590
|
|
|
|
12591
|
50 |
|
return (string) \mb_ereg_replace($pattern, '', $str); |
12592
|
|
|
} |
12593
|
|
|
|
12594
|
8 |
|
if ($chars !== null) { |
12595
|
|
|
$chars = \preg_quote($chars, '/'); |
12596
|
|
|
$pattern = "^[${chars}]+|[${chars}]+\$"; |
12597
|
|
|
} else { |
12598
|
8 |
|
$pattern = '^[\\s]+|[\\s]+$'; |
12599
|
|
|
} |
12600
|
|
|
|
12601
|
8 |
|
return self::regex_replace($str, $pattern, ''); |
12602
|
|
|
} |
12603
|
|
|
|
12604
|
|
|
/** |
12605
|
|
|
* Makes string's first char uppercase. |
12606
|
|
|
* |
12607
|
|
|
* EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code> |
12608
|
|
|
* |
12609
|
|
|
* @param string $str <p>The input string.</p> |
12610
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
12611
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
12612
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
12613
|
|
|
* tr</p> |
12614
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
12615
|
|
|
* -> ß</p> |
12616
|
|
|
* |
12617
|
|
|
* @psalm-pure |
12618
|
|
|
* |
12619
|
|
|
* @return string |
12620
|
|
|
* <p>The resulting string with with char uppercase.</p> |
12621
|
|
|
*/ |
12622
|
69 |
|
public static function ucfirst( |
12623
|
|
|
string $str, |
12624
|
|
|
string $encoding = 'UTF-8', |
12625
|
|
|
bool $clean_utf8 = false, |
12626
|
|
|
string $lang = null, |
12627
|
|
|
bool $try_to_keep_the_string_length = false |
12628
|
|
|
): string { |
12629
|
69 |
|
if ($str === '') { |
12630
|
3 |
|
return ''; |
12631
|
|
|
} |
12632
|
|
|
|
12633
|
68 |
|
if ($clean_utf8) { |
12634
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
12635
|
|
|
// if invalid characters are found in $haystack before $needle |
12636
|
1 |
|
$str = self::clean($str); |
12637
|
|
|
} |
12638
|
|
|
|
12639
|
68 |
|
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length; |
12640
|
|
|
|
12641
|
68 |
|
if ($encoding === 'UTF-8') { |
12642
|
22 |
|
$str_part_two = (string) \mb_substr($str, 1); |
12643
|
|
|
|
12644
|
22 |
|
if ($use_mb_functions) { |
12645
|
22 |
|
$str_part_one = \mb_strtoupper( |
12646
|
22 |
|
(string) \mb_substr($str, 0, 1) |
12647
|
|
|
); |
12648
|
|
|
} else { |
12649
|
22 |
|
$str_part_one = self::strtoupper( |
12650
|
|
|
(string) \mb_substr($str, 0, 1), |
12651
|
|
|
$encoding, |
12652
|
|
|
false, |
12653
|
|
|
$lang, |
12654
|
|
|
$try_to_keep_the_string_length |
12655
|
|
|
); |
12656
|
|
|
} |
12657
|
|
|
} else { |
12658
|
47 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
12659
|
|
|
|
12660
|
47 |
|
$str_part_two = (string) self::substr($str, 1, null, $encoding); |
12661
|
|
|
|
12662
|
47 |
|
if ($use_mb_functions) { |
12663
|
47 |
|
$str_part_one = \mb_strtoupper( |
12664
|
47 |
|
(string) \mb_substr($str, 0, 1, $encoding), |
12665
|
47 |
|
$encoding |
12666
|
|
|
); |
12667
|
|
|
} else { |
12668
|
|
|
$str_part_one = self::strtoupper( |
12669
|
|
|
(string) self::substr($str, 0, 1, $encoding), |
12670
|
|
|
$encoding, |
12671
|
|
|
false, |
12672
|
|
|
$lang, |
12673
|
|
|
$try_to_keep_the_string_length |
12674
|
|
|
); |
12675
|
|
|
} |
12676
|
|
|
} |
12677
|
|
|
|
12678
|
68 |
|
return $str_part_one . $str_part_two; |
12679
|
|
|
} |
12680
|
|
|
|
12681
|
|
|
/** |
12682
|
|
|
* Uppercase for all words in the string. |
12683
|
|
|
* |
12684
|
|
|
* EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code> |
12685
|
|
|
* |
12686
|
|
|
* @param string $str <p>The input string.</p> |
12687
|
|
|
* @param string[] $exceptions [optional] <p>Exclusion for some words.</p> |
12688
|
|
|
* @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new |
12689
|
|
|
* word.</p> |
12690
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
12691
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
12692
|
|
|
* |
12693
|
|
|
* @psalm-pure |
12694
|
|
|
* |
12695
|
|
|
* @return string |
12696
|
|
|
*/ |
12697
|
9 |
|
public static function ucwords( |
12698
|
|
|
string $str, |
12699
|
|
|
array $exceptions = [], |
12700
|
|
|
string $char_list = '', |
12701
|
|
|
string $encoding = 'UTF-8', |
12702
|
|
|
bool $clean_utf8 = false |
12703
|
|
|
): string { |
12704
|
9 |
|
if (!$str) { |
12705
|
2 |
|
return ''; |
12706
|
|
|
} |
12707
|
|
|
|
12708
|
|
|
// INFO: mb_convert_case($str, MB_CASE_TITLE); |
12709
|
|
|
// -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters |
12710
|
|
|
|
12711
|
8 |
|
if ($clean_utf8) { |
12712
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
12713
|
|
|
// if invalid characters are found in $haystack before $needle |
12714
|
1 |
|
$str = self::clean($str); |
12715
|
|
|
} |
12716
|
|
|
|
12717
|
8 |
|
$use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions)); |
12718
|
|
|
|
12719
|
|
|
if ( |
12720
|
8 |
|
$use_php_default_functions |
12721
|
|
|
&& |
12722
|
8 |
|
ASCII::is_ascii($str) |
12723
|
|
|
) { |
12724
|
|
|
return \ucwords($str); |
12725
|
|
|
} |
12726
|
|
|
|
12727
|
8 |
|
$words = self::str_to_words($str, $char_list); |
12728
|
8 |
|
$use_exceptions = $exceptions !== []; |
12729
|
|
|
|
12730
|
8 |
|
$words_str = ''; |
12731
|
8 |
|
foreach ($words as &$word) { |
12732
|
8 |
|
if (!$word) { |
12733
|
8 |
|
continue; |
12734
|
|
|
} |
12735
|
|
|
|
12736
|
|
|
if ( |
12737
|
8 |
|
!$use_exceptions |
12738
|
|
|
|| |
12739
|
8 |
|
!\in_array($word, $exceptions, true) |
12740
|
|
|
) { |
12741
|
8 |
|
$words_str .= self::ucfirst($word, $encoding); |
12742
|
|
|
} else { |
12743
|
1 |
|
$words_str .= $word; |
12744
|
|
|
} |
12745
|
|
|
} |
12746
|
|
|
|
12747
|
8 |
|
return $words_str; |
12748
|
|
|
} |
12749
|
|
|
|
12750
|
|
|
/** |
12751
|
|
|
* Multi decode HTML entity + fix urlencoded-win1252-chars. |
12752
|
|
|
* |
12753
|
|
|
* EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code> |
12754
|
|
|
* |
12755
|
|
|
* e.g: |
12756
|
|
|
* 'test+test' => 'test test' |
12757
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
12758
|
|
|
* 'D%FCsseldorf' => 'Düsseldorf' |
12759
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
12760
|
|
|
* 'D%26%23xFC%3Bsseldorf' => 'Düsseldorf' |
12761
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
12762
|
|
|
* 'D%C3%BCsseldorf' => 'Düsseldorf' |
12763
|
|
|
* 'D%C3%83%C2%BCsseldorf' => 'Düsseldorf' |
12764
|
|
|
* 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf' |
12765
|
|
|
* |
12766
|
|
|
* @param string $str <p>The input string.</p> |
12767
|
|
|
* @param bool $multi_decode <p>Decode as often as possible.</p> |
12768
|
|
|
* |
12769
|
|
|
* @psalm-pure |
12770
|
|
|
* |
12771
|
|
|
* @return string |
12772
|
|
|
*/ |
12773
|
4 |
|
public static function urldecode(string $str, bool $multi_decode = true): string |
12774
|
|
|
{ |
12775
|
4 |
|
if ($str === '') { |
12776
|
3 |
|
return ''; |
12777
|
|
|
} |
12778
|
|
|
|
12779
|
4 |
|
$str = self::urldecode_unicode_helper($str); |
12780
|
|
|
|
12781
|
4 |
|
if ($multi_decode) { |
12782
|
|
|
do { |
12783
|
3 |
|
$str_compare = $str; |
12784
|
|
|
|
12785
|
|
|
/** |
12786
|
|
|
* @psalm-suppress PossiblyInvalidArgument |
12787
|
|
|
*/ |
12788
|
3 |
|
$str = \urldecode( |
12789
|
3 |
|
self::html_entity_decode( |
12790
|
3 |
|
self::to_utf8($str), |
12791
|
3 |
|
\ENT_QUOTES | \ENT_HTML5 |
12792
|
|
|
) |
12793
|
|
|
); |
12794
|
3 |
|
} while ($str_compare !== $str); |
12795
|
|
|
} else { |
12796
|
|
|
/** |
12797
|
|
|
* @psalm-suppress PossiblyInvalidArgument |
12798
|
|
|
*/ |
12799
|
1 |
|
$str = \urldecode( |
12800
|
1 |
|
self::html_entity_decode( |
12801
|
1 |
|
self::to_utf8($str), |
12802
|
1 |
|
\ENT_QUOTES | \ENT_HTML5 |
12803
|
|
|
) |
12804
|
|
|
); |
12805
|
|
|
} |
12806
|
|
|
|
12807
|
4 |
|
return self::fix_simple_utf8($str); |
12808
|
|
|
} |
12809
|
|
|
|
12810
|
|
|
/** |
12811
|
|
|
* Decodes a UTF-8 string to ISO-8859-1. |
12812
|
|
|
* |
12813
|
|
|
* EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code> |
12814
|
|
|
* |
12815
|
|
|
* @param string $str <p>The input string.</p> |
12816
|
|
|
* @param bool $keep_utf8_chars |
12817
|
|
|
* |
12818
|
|
|
* @psalm-pure |
12819
|
|
|
* |
12820
|
|
|
* @return string |
12821
|
|
|
*/ |
12822
|
14 |
|
public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string |
12823
|
|
|
{ |
12824
|
14 |
|
if ($str === '') { |
12825
|
6 |
|
return ''; |
12826
|
|
|
} |
12827
|
|
|
|
12828
|
|
|
// save for later comparision |
12829
|
14 |
|
$str_backup = $str; |
12830
|
14 |
|
$len = \strlen($str); |
12831
|
|
|
|
12832
|
14 |
|
if (self::$ORD === null) { |
12833
|
|
|
self::$ORD = self::getData('ord'); |
12834
|
|
|
} |
12835
|
|
|
|
12836
|
14 |
|
if (self::$CHR === null) { |
12837
|
|
|
self::$CHR = self::getData('chr'); |
12838
|
|
|
} |
12839
|
|
|
|
12840
|
14 |
|
$no_char_found = '?'; |
12841
|
14 |
|
for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) { |
12842
|
14 |
|
switch ($str[$i] & "\xF0") { |
12843
|
14 |
|
case "\xC0": |
12844
|
13 |
|
case "\xD0": |
12845
|
13 |
|
$c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"]; |
12846
|
13 |
|
$str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found; |
12847
|
|
|
|
12848
|
13 |
|
break; |
12849
|
|
|
|
12850
|
13 |
|
case "\xF0": |
12851
|
|
|
++$i; |
12852
|
|
|
|
12853
|
|
|
// no break |
12854
|
|
|
|
12855
|
13 |
|
case "\xE0": |
12856
|
11 |
|
$str[$j] = $no_char_found; |
12857
|
11 |
|
$i += 2; |
12858
|
|
|
|
12859
|
11 |
|
break; |
12860
|
|
|
|
12861
|
|
|
default: |
12862
|
12 |
|
$str[$j] = $str[$i]; |
12863
|
|
|
} |
12864
|
|
|
} |
12865
|
|
|
|
12866
|
|
|
/** @var false|string $return - needed for PhpStan (stubs error) */ |
12867
|
14 |
|
$return = \substr($str, 0, $j); |
12868
|
14 |
|
if ($return === false) { |
12869
|
|
|
$return = ''; |
12870
|
|
|
} |
12871
|
|
|
|
12872
|
|
|
if ( |
12873
|
14 |
|
$keep_utf8_chars |
12874
|
|
|
&& |
12875
|
14 |
|
(int) self::strlen($return) >= (int) self::strlen($str_backup) |
12876
|
|
|
) { |
12877
|
2 |
|
return $str_backup; |
12878
|
|
|
} |
12879
|
|
|
|
12880
|
14 |
|
return $return; |
12881
|
|
|
} |
12882
|
|
|
|
12883
|
|
|
/** |
12884
|
|
|
* Encodes an ISO-8859-1 string to UTF-8. |
12885
|
|
|
* |
12886
|
|
|
* EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code> |
12887
|
|
|
* |
12888
|
|
|
* @param string $str <p>The input string.</p> |
12889
|
|
|
* |
12890
|
|
|
* @psalm-pure |
12891
|
|
|
* |
12892
|
|
|
* @return string |
12893
|
|
|
*/ |
12894
|
16 |
|
public static function utf8_encode(string $str): string |
12895
|
|
|
{ |
12896
|
16 |
|
if ($str === '') { |
12897
|
14 |
|
return ''; |
12898
|
|
|
} |
12899
|
|
|
|
12900
|
|
|
/** @var false|string $str - the polyfill maybe return false */ |
12901
|
16 |
|
$str = \utf8_encode($str); |
|
|
|
|
12902
|
|
|
|
12903
|
16 |
|
if ($str === false) { |
12904
|
|
|
return ''; |
12905
|
|
|
} |
12906
|
|
|
|
12907
|
16 |
|
return $str; |
12908
|
|
|
} |
12909
|
|
|
|
12910
|
|
|
/** |
12911
|
|
|
* Returns an array with all utf8 whitespace characters. |
12912
|
|
|
* |
12913
|
|
|
* @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html |
12914
|
|
|
* |
12915
|
|
|
* @psalm-pure |
12916
|
|
|
* |
12917
|
|
|
* @return string[] |
12918
|
|
|
* An array with all known whitespace characters as values and the type of whitespace as keys |
12919
|
|
|
* as defined in above URL |
12920
|
|
|
*/ |
12921
|
2 |
|
public static function whitespace_table(): array |
12922
|
|
|
{ |
12923
|
2 |
|
return self::$WHITESPACE_TABLE; |
12924
|
|
|
} |
12925
|
|
|
|
12926
|
|
|
/** |
12927
|
|
|
* Limit the number of words in a string. |
12928
|
|
|
* |
12929
|
|
|
* EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code> |
12930
|
|
|
* |
12931
|
|
|
* @param string $str <p>The input string.</p> |
12932
|
|
|
* @param int $limit <p>The limit of words as integer.</p> |
12933
|
|
|
* @param string $str_add_on <p>Replacement for the striped string.</p> |
12934
|
|
|
* |
12935
|
|
|
* @psalm-pure |
12936
|
|
|
* |
12937
|
|
|
* @return string |
12938
|
|
|
*/ |
12939
|
2 |
|
public static function words_limit( |
12940
|
|
|
string $str, |
12941
|
|
|
int $limit = 100, |
12942
|
|
|
string $str_add_on = '…' |
12943
|
|
|
): string { |
12944
|
2 |
|
if ($str === '' || $limit < 1) { |
12945
|
2 |
|
return ''; |
12946
|
|
|
} |
12947
|
|
|
|
12948
|
2 |
|
\preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches); |
12949
|
|
|
|
12950
|
|
|
if ( |
12951
|
2 |
|
!isset($matches[0]) |
12952
|
|
|
|| |
12953
|
2 |
|
\mb_strlen($str) === (int) \mb_strlen($matches[0]) |
12954
|
|
|
) { |
12955
|
2 |
|
return $str; |
12956
|
|
|
} |
12957
|
|
|
|
12958
|
2 |
|
return \rtrim($matches[0]) . $str_add_on; |
12959
|
|
|
} |
12960
|
|
|
|
12961
|
|
|
/** |
12962
|
|
|
* Wraps a string to a given number of characters |
12963
|
|
|
* |
12964
|
|
|
* EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code> |
12965
|
|
|
* |
12966
|
|
|
* @see http://php.net/manual/en/function.wordwrap.php |
12967
|
|
|
* |
12968
|
|
|
* @param string $str <p>The input string.</p> |
12969
|
|
|
* @param int $width [optional] <p>The column width.</p> |
12970
|
|
|
* @param string $break [optional] <p>The line is broken using the optional break parameter.</p> |
12971
|
|
|
* @param bool $cut [optional] <p> |
12972
|
|
|
* If the cut is set to true, the string is |
12973
|
|
|
* always wrapped at or before the specified width. So if you have |
12974
|
|
|
* a word that is larger than the given width, it is broken apart. |
12975
|
|
|
* </p> |
12976
|
|
|
* |
12977
|
|
|
* @psalm-pure |
12978
|
|
|
* |
12979
|
|
|
* @return string |
12980
|
|
|
* <p>The given string wrapped at the specified column.</p> |
12981
|
|
|
*/ |
12982
|
12 |
|
public static function wordwrap( |
12983
|
|
|
string $str, |
12984
|
|
|
int $width = 75, |
12985
|
|
|
string $break = "\n", |
12986
|
|
|
bool $cut = false |
12987
|
|
|
): string { |
12988
|
12 |
|
if ($str === '' || $break === '') { |
12989
|
4 |
|
return ''; |
12990
|
|
|
} |
12991
|
|
|
|
12992
|
10 |
|
$str_split = \explode($break, $str); |
12993
|
|
|
|
12994
|
|
|
/** @var string[] $charsArray */ |
12995
|
10 |
|
$charsArray = []; |
12996
|
10 |
|
$word_split = ''; |
12997
|
10 |
|
foreach ($str_split as $i => $i_value) { |
12998
|
10 |
|
if ($i) { |
12999
|
3 |
|
$charsArray[] = $break; |
13000
|
3 |
|
$word_split .= '#'; |
13001
|
|
|
} |
13002
|
|
|
|
13003
|
10 |
|
foreach (self::str_split($i_value) as $c) { |
13004
|
10 |
|
$charsArray[] = $c; |
13005
|
10 |
|
if ($c === ' ') { |
13006
|
3 |
|
$word_split .= ' '; |
13007
|
|
|
} else { |
13008
|
10 |
|
$word_split .= '?'; |
13009
|
|
|
} |
13010
|
|
|
} |
13011
|
|
|
} |
13012
|
|
|
|
13013
|
10 |
|
$str_return = ''; |
13014
|
10 |
|
$j = 0; |
13015
|
10 |
|
$b = -1; |
13016
|
10 |
|
$i = -1; |
13017
|
10 |
|
$word_split = \wordwrap($word_split, $width, '#', $cut); |
13018
|
|
|
|
13019
|
10 |
|
$max = \mb_strlen($word_split); |
13020
|
|
|
/** @noinspection PhpAssignmentInConditionInspection - is ok here */ |
13021
|
10 |
|
while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) { |
13022
|
8 |
|
for (++$i; $i < $b; ++$i) { |
13023
|
8 |
|
if (isset($charsArray[$j])) { |
13024
|
8 |
|
$str_return .= $charsArray[$j]; |
13025
|
8 |
|
unset($charsArray[$j]); |
13026
|
|
|
} |
13027
|
8 |
|
++$j; |
13028
|
|
|
|
13029
|
|
|
// prevent endless loop, e.g. if there is a error in the "mb_*" polyfill |
13030
|
8 |
|
if ($i > $max) { |
13031
|
|
|
break 2; |
13032
|
|
|
} |
13033
|
|
|
} |
13034
|
|
|
|
13035
|
|
|
if ( |
13036
|
8 |
|
$break === $charsArray[$j] |
13037
|
|
|
|| |
13038
|
8 |
|
$charsArray[$j] === ' ' |
13039
|
|
|
) { |
13040
|
5 |
|
unset($charsArray[$j++]); |
13041
|
|
|
} |
13042
|
|
|
|
13043
|
8 |
|
$str_return .= $break; |
13044
|
|
|
|
13045
|
|
|
// prevent endless loop, e.g. if there is a error in the "mb_*" polyfill |
13046
|
8 |
|
if ($b > $max) { |
13047
|
|
|
break; |
13048
|
|
|
} |
13049
|
|
|
} |
13050
|
|
|
|
13051
|
10 |
|
return $str_return . \implode('', $charsArray); |
13052
|
|
|
} |
13053
|
|
|
|
13054
|
|
|
/** |
13055
|
|
|
* Line-Wrap the string after $limit, but split the string by "$delimiter" before ... |
13056
|
|
|
* ... so that we wrap the per line. |
13057
|
|
|
* |
13058
|
|
|
* @param string $str <p>The input string.</p> |
13059
|
|
|
* @param int $width [optional] <p>The column width.</p> |
13060
|
|
|
* @param string $break [optional] <p>The line is broken using the optional break parameter.</p> |
13061
|
|
|
* @param bool $cut [optional] <p> |
13062
|
|
|
* If the cut is set to true, the string is |
13063
|
|
|
* always wrapped at or before the specified width. So if you have |
13064
|
|
|
* a word that is larger than the given width, it is broken apart. |
13065
|
|
|
* </p> |
13066
|
|
|
* @param bool $add_final_break [optional] <p> |
13067
|
|
|
* If this flag is true, then the method will add a $break at the end |
13068
|
|
|
* of the result string. |
13069
|
|
|
* </p> |
13070
|
|
|
* @param string|null $delimiter [optional] <p> |
13071
|
|
|
* You can change the default behavior, where we split the string by newline. |
13072
|
|
|
* </p> |
13073
|
|
|
* |
13074
|
|
|
* @psalm-pure |
13075
|
|
|
* |
13076
|
|
|
* @return string |
13077
|
|
|
*/ |
13078
|
1 |
|
public static function wordwrap_per_line( |
13079
|
|
|
string $str, |
13080
|
|
|
int $width = 75, |
13081
|
|
|
string $break = "\n", |
13082
|
|
|
bool $cut = false, |
13083
|
|
|
bool $add_final_break = true, |
13084
|
|
|
string $delimiter = null |
13085
|
|
|
): string { |
13086
|
1 |
|
if ($delimiter === null) { |
13087
|
1 |
|
$strings = \preg_split('/\\r\\n|\\r|\\n/', $str); |
13088
|
|
|
} else { |
13089
|
1 |
|
$strings = \explode($delimiter, $str); |
13090
|
|
|
} |
13091
|
|
|
|
13092
|
1 |
|
$string_helper_array = []; |
13093
|
1 |
|
if ($strings !== false) { |
13094
|
1 |
|
foreach ($strings as $value) { |
13095
|
1 |
|
$string_helper_array[] = self::wordwrap($value, $width, $break, $cut); |
13096
|
|
|
} |
13097
|
|
|
} |
13098
|
|
|
|
13099
|
1 |
|
if ($add_final_break) { |
13100
|
1 |
|
$final_break = $break; |
13101
|
|
|
} else { |
13102
|
1 |
|
$final_break = ''; |
13103
|
|
|
} |
13104
|
|
|
|
13105
|
1 |
|
return \implode($delimiter ?? "\n", $string_helper_array) . $final_break; |
13106
|
|
|
} |
13107
|
|
|
|
13108
|
|
|
/** |
13109
|
|
|
* Returns an array of Unicode White Space characters. |
13110
|
|
|
* |
13111
|
|
|
* @psalm-pure |
13112
|
|
|
* |
13113
|
|
|
* @return string[] |
13114
|
|
|
* <p>An array with numeric code point as key and White Space Character as value.</p> |
13115
|
|
|
*/ |
13116
|
2 |
|
public static function ws(): array |
13117
|
|
|
{ |
13118
|
2 |
|
return self::$WHITESPACE; |
13119
|
|
|
} |
13120
|
|
|
|
13121
|
|
|
/** |
13122
|
|
|
* Checks whether the passed string contains only byte sequences that are valid UTF-8 characters. |
13123
|
|
|
* |
13124
|
|
|
* EXAMPLE: <code> |
13125
|
|
|
* UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true |
13126
|
|
|
* // |
13127
|
|
|
* UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false |
13128
|
|
|
* </code> |
13129
|
|
|
* |
13130
|
|
|
* @see http://hsivonen.iki.fi/php-utf8/ |
13131
|
|
|
* |
13132
|
|
|
* @param string $str <p>The string to be checked.</p> |
13133
|
|
|
* @param bool $strict <p>Check also if the string is not UTF-16 or UTF-32.</p> |
13134
|
|
|
* |
13135
|
|
|
* @psalm-pure |
13136
|
|
|
* |
13137
|
|
|
* @return bool |
13138
|
|
|
* |
13139
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
13140
|
|
|
*/ |
13141
|
110 |
|
private static function is_utf8_string(string $str, bool $strict = false) |
13142
|
|
|
{ |
13143
|
110 |
|
if ($str === '') { |
13144
|
15 |
|
return true; |
13145
|
|
|
} |
13146
|
|
|
|
13147
|
103 |
|
if ($strict) { |
13148
|
2 |
|
$is_binary = self::is_binary($str, true); |
13149
|
|
|
|
13150
|
2 |
|
if ($is_binary && self::is_utf16($str, false) !== false) { |
13151
|
2 |
|
return false; |
13152
|
|
|
} |
13153
|
|
|
|
13154
|
2 |
|
if ($is_binary && self::is_utf32($str, false) !== false) { |
13155
|
|
|
return false; |
13156
|
|
|
} |
13157
|
|
|
} |
13158
|
|
|
|
13159
|
103 |
|
if (self::$SUPPORT['pcre_utf8']) { |
13160
|
|
|
// If even just the first character can be matched, when the /u |
13161
|
|
|
// modifier is used, then it's valid UTF-8. If the UTF-8 is somehow |
13162
|
|
|
// invalid, nothing at all will match, even if the string contains |
13163
|
|
|
// some valid sequences |
13164
|
103 |
|
return \preg_match('/^./us', $str) === 1; |
13165
|
|
|
} |
13166
|
|
|
|
13167
|
2 |
|
$mState = 0; // cached expected number of octets after the current octet |
13168
|
|
|
// until the beginning of the next UTF8 character sequence |
13169
|
2 |
|
$mUcs4 = 0; // cached Unicode character |
13170
|
2 |
|
$mBytes = 1; // cached expected number of octets in the current sequence |
13171
|
|
|
|
13172
|
2 |
|
if (self::$ORD === null) { |
13173
|
|
|
self::$ORD = self::getData('ord'); |
13174
|
|
|
} |
13175
|
|
|
|
13176
|
2 |
|
$len = \strlen($str); |
13177
|
2 |
|
for ($i = 0; $i < $len; ++$i) { |
13178
|
2 |
|
$in = self::$ORD[$str[$i]]; |
13179
|
|
|
|
13180
|
2 |
|
if ($mState === 0) { |
13181
|
|
|
// When mState is zero we expect either a US-ASCII character or a |
13182
|
|
|
// multi-octet sequence. |
13183
|
2 |
|
if ((0x80 & $in) === 0) { |
13184
|
|
|
// US-ASCII, pass straight through. |
13185
|
2 |
|
$mBytes = 1; |
13186
|
2 |
|
} elseif ((0xE0 & $in) === 0xC0) { |
13187
|
|
|
// First octet of 2 octet sequence. |
13188
|
2 |
|
$mUcs4 = $in; |
13189
|
2 |
|
$mUcs4 = ($mUcs4 & 0x1F) << 6; |
13190
|
2 |
|
$mState = 1; |
13191
|
2 |
|
$mBytes = 2; |
13192
|
2 |
|
} elseif ((0xF0 & $in) === 0xE0) { |
13193
|
|
|
// First octet of 3 octet sequence. |
13194
|
2 |
|
$mUcs4 = $in; |
13195
|
2 |
|
$mUcs4 = ($mUcs4 & 0x0F) << 12; |
13196
|
2 |
|
$mState = 2; |
13197
|
2 |
|
$mBytes = 3; |
13198
|
|
|
} elseif ((0xF8 & $in) === 0xF0) { |
13199
|
|
|
// First octet of 4 octet sequence. |
13200
|
|
|
$mUcs4 = $in; |
13201
|
|
|
$mUcs4 = ($mUcs4 & 0x07) << 18; |
13202
|
|
|
$mState = 3; |
13203
|
|
|
$mBytes = 4; |
13204
|
|
|
} elseif ((0xFC & $in) === 0xF8) { |
13205
|
|
|
/* First octet of 5 octet sequence. |
13206
|
|
|
* |
13207
|
|
|
* This is illegal because the encoded codepoint must be either |
13208
|
|
|
* (a) not the shortest form or |
13209
|
|
|
* (b) outside the Unicode range of 0-0x10FFFF. |
13210
|
|
|
* Rather than trying to resynchronize, we will carry on until the end |
13211
|
|
|
* of the sequence and let the later error handling code catch it. |
13212
|
|
|
*/ |
13213
|
|
|
$mUcs4 = $in; |
13214
|
|
|
$mUcs4 = ($mUcs4 & 0x03) << 24; |
13215
|
|
|
$mState = 4; |
13216
|
|
|
$mBytes = 5; |
13217
|
|
|
} elseif ((0xFE & $in) === 0xFC) { |
13218
|
|
|
// First octet of 6 octet sequence, see comments for 5 octet sequence. |
13219
|
|
|
$mUcs4 = $in; |
13220
|
|
|
$mUcs4 = ($mUcs4 & 1) << 30; |
13221
|
|
|
$mState = 5; |
13222
|
|
|
$mBytes = 6; |
13223
|
|
|
} else { |
13224
|
|
|
// Current octet is neither in the US-ASCII range nor a legal first |
13225
|
|
|
// octet of a multi-octet sequence. |
13226
|
2 |
|
return false; |
13227
|
|
|
} |
13228
|
2 |
|
} elseif ((0xC0 & $in) === 0x80) { |
13229
|
|
|
|
13230
|
|
|
// When mState is non-zero, we expect a continuation of the multi-octet |
13231
|
|
|
// sequence |
13232
|
|
|
|
13233
|
|
|
// Legal continuation. |
13234
|
2 |
|
$shift = ($mState - 1) * 6; |
13235
|
2 |
|
$tmp = $in; |
13236
|
2 |
|
$tmp = ($tmp & 0x0000003F) << $shift; |
13237
|
2 |
|
$mUcs4 |= $tmp; |
13238
|
|
|
// Prefix: End of the multi-octet sequence. mUcs4 now contains the final |
13239
|
|
|
// Unicode code point to be output. |
13240
|
2 |
|
if (--$mState === 0) { |
13241
|
|
|
// Check for illegal sequences and code points. |
13242
|
|
|
// |
13243
|
|
|
// From Unicode 3.1, non-shortest form is illegal |
13244
|
|
|
if ( |
13245
|
2 |
|
($mBytes === 2 && $mUcs4 < 0x0080) |
13246
|
|
|
|| |
13247
|
2 |
|
($mBytes === 3 && $mUcs4 < 0x0800) |
13248
|
|
|
|| |
13249
|
2 |
|
($mBytes === 4 && $mUcs4 < 0x10000) |
13250
|
|
|
|| |
13251
|
2 |
|
($mBytes > 4) |
13252
|
|
|
|| |
13253
|
|
|
// From Unicode 3.2, surrogate characters are illegal. |
13254
|
2 |
|
(($mUcs4 & 0xFFFFF800) === 0xD800) |
13255
|
|
|
|| |
13256
|
|
|
// Code points outside the Unicode range are illegal. |
13257
|
2 |
|
($mUcs4 > 0x10FFFF) |
13258
|
|
|
) { |
13259
|
|
|
return false; |
13260
|
|
|
} |
13261
|
|
|
// initialize UTF8 cache |
13262
|
2 |
|
$mState = 0; |
13263
|
2 |
|
$mUcs4 = 0; |
13264
|
2 |
|
$mBytes = 1; |
13265
|
|
|
} |
13266
|
|
|
} else { |
13267
|
|
|
// ((0xC0 & (*in) != 0x80) && (mState != 0)) |
13268
|
|
|
// Incomplete multi-octet sequence. |
13269
|
|
|
return false; |
13270
|
|
|
} |
13271
|
|
|
} |
13272
|
|
|
|
13273
|
2 |
|
return $mState === 0; |
13274
|
|
|
} |
13275
|
|
|
|
13276
|
|
|
/** |
13277
|
|
|
* @param string $str |
13278
|
|
|
* @param bool $use_lowercase <p>Use uppercase by default, otherwise use lowercase.</p> |
13279
|
|
|
* @param bool $use_full_case_fold <p>Convert not only common cases.</p> |
13280
|
|
|
* |
13281
|
|
|
* @psalm-pure |
13282
|
|
|
* |
13283
|
|
|
* @return string |
13284
|
|
|
* |
13285
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
13286
|
|
|
*/ |
13287
|
33 |
|
private static function fixStrCaseHelper( |
13288
|
|
|
string $str, |
13289
|
|
|
bool $use_lowercase = false, |
13290
|
|
|
bool $use_full_case_fold = false |
13291
|
|
|
) { |
13292
|
33 |
|
$upper = self::$COMMON_CASE_FOLD['upper']; |
13293
|
33 |
|
$lower = self::$COMMON_CASE_FOLD['lower']; |
13294
|
|
|
|
13295
|
33 |
|
if ($use_lowercase) { |
13296
|
2 |
|
$str = \str_replace( |
13297
|
2 |
|
$upper, |
13298
|
2 |
|
$lower, |
13299
|
2 |
|
$str |
13300
|
|
|
); |
13301
|
|
|
} else { |
13302
|
31 |
|
$str = \str_replace( |
13303
|
31 |
|
$lower, |
13304
|
31 |
|
$upper, |
13305
|
31 |
|
$str |
13306
|
|
|
); |
13307
|
|
|
} |
13308
|
|
|
|
13309
|
33 |
|
if ($use_full_case_fold) { |
13310
|
|
|
/** |
13311
|
|
|
* @psalm-suppress ImpureStaticVariable |
13312
|
|
|
* |
13313
|
|
|
* @var array<mixed>|null |
13314
|
|
|
*/ |
13315
|
31 |
|
static $FULL_CASE_FOLD = null; |
13316
|
31 |
|
if ($FULL_CASE_FOLD === null) { |
13317
|
1 |
|
$FULL_CASE_FOLD = self::getData('caseFolding_full'); |
13318
|
|
|
} |
13319
|
|
|
|
13320
|
31 |
|
if ($use_lowercase) { |
13321
|
2 |
|
$str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str); |
13322
|
|
|
} else { |
13323
|
29 |
|
$str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str); |
13324
|
|
|
} |
13325
|
|
|
} |
13326
|
|
|
|
13327
|
33 |
|
return $str; |
13328
|
|
|
} |
13329
|
|
|
|
13330
|
|
|
/** |
13331
|
|
|
* get data from "/data/*.php" |
13332
|
|
|
* |
13333
|
|
|
* @param string $file |
13334
|
|
|
* |
13335
|
|
|
* @psalm-pure |
13336
|
|
|
* |
13337
|
|
|
* @return array |
13338
|
|
|
* |
13339
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
13340
|
|
|
*/ |
13341
|
7 |
|
private static function getData(string $file) |
13342
|
|
|
{ |
13343
|
|
|
/** @noinspection PhpIncludeInspection */ |
13344
|
|
|
/** @noinspection UsingInclusionReturnValueInspection */ |
13345
|
|
|
/** @psalm-suppress UnresolvableInclude */ |
13346
|
7 |
|
return include __DIR__ . '/data/' . $file . '.php'; |
13347
|
|
|
} |
13348
|
|
|
|
13349
|
|
|
/** |
13350
|
|
|
* @psalm-pure |
13351
|
|
|
* |
13352
|
|
|
* @return true|null |
13353
|
|
|
* |
13354
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
13355
|
|
|
*/ |
13356
|
1 |
|
private static function initEmojiData() |
13357
|
|
|
{ |
13358
|
1 |
|
if (self::$EMOJI_KEYS_CACHE === null) { |
13359
|
1 |
|
if (self::$EMOJI === null) { |
13360
|
1 |
|
self::$EMOJI = self::getData('emoji'); |
13361
|
|
|
} |
13362
|
|
|
|
13363
|
|
|
/** |
13364
|
|
|
* @psalm-suppress ImpureFunctionCall - static sort function is used |
13365
|
|
|
*/ |
13366
|
1 |
|
\uksort( |
13367
|
1 |
|
self::$EMOJI, |
13368
|
1 |
|
static function (string $a, string $b): int { |
13369
|
1 |
|
return \strlen($b) <=> \strlen($a); |
13370
|
1 |
|
} |
13371
|
|
|
); |
13372
|
|
|
|
13373
|
1 |
|
self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI); |
13374
|
1 |
|
self::$EMOJI_VALUES_CACHE = self::$EMOJI; |
13375
|
|
|
|
13376
|
1 |
|
foreach (self::$EMOJI_KEYS_CACHE as $key) { |
13377
|
1 |
|
$tmp_key = \crc32($key); |
13378
|
1 |
|
self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_'; |
13379
|
|
|
} |
13380
|
|
|
|
13381
|
1 |
|
return true; |
13382
|
|
|
} |
13383
|
|
|
|
13384
|
|
|
return null; |
13385
|
|
|
} |
13386
|
|
|
|
13387
|
|
|
/** |
13388
|
|
|
* Checks whether mbstring "overloaded" is active on the server. |
13389
|
|
|
* |
13390
|
|
|
* @psalm-pure |
13391
|
|
|
* |
13392
|
|
|
* @return bool |
13393
|
|
|
*/ |
13394
|
|
|
private static function mbstring_overloaded(): bool |
13395
|
|
|
{ |
13396
|
|
|
/** |
13397
|
|
|
* INI directive 'mbstring.func_overload' is deprecated since PHP 7.2 |
13398
|
|
|
*/ |
13399
|
|
|
|
13400
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
13401
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
13402
|
|
|
/** @noinspection DeprecatedIniOptionsInspection */ |
13403
|
|
|
return \defined('MB_OVERLOAD_STRING') |
13404
|
|
|
&& |
13405
|
|
|
((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING); |
13406
|
|
|
} |
13407
|
|
|
|
13408
|
|
|
/** |
13409
|
|
|
* @param array $strings |
13410
|
|
|
* @param bool $remove_empty_values |
13411
|
|
|
* @param int|null $remove_short_values |
13412
|
|
|
* |
13413
|
|
|
* @psalm-pure |
13414
|
|
|
* |
13415
|
|
|
* @return array |
13416
|
|
|
* |
13417
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
13418
|
|
|
*/ |
13419
|
2 |
|
private static function reduce_string_array( |
13420
|
|
|
array $strings, |
13421
|
|
|
bool $remove_empty_values, |
13422
|
|
|
int $remove_short_values = null |
13423
|
|
|
) { |
13424
|
|
|
// init |
13425
|
2 |
|
$return = []; |
13426
|
|
|
|
13427
|
2 |
|
foreach ($strings as &$str) { |
13428
|
|
|
if ( |
13429
|
2 |
|
$remove_short_values !== null |
13430
|
|
|
&& |
13431
|
2 |
|
\mb_strlen($str) <= $remove_short_values |
13432
|
|
|
) { |
13433
|
2 |
|
continue; |
13434
|
|
|
} |
13435
|
|
|
|
13436
|
|
|
if ( |
13437
|
2 |
|
$remove_empty_values |
13438
|
|
|
&& |
13439
|
2 |
|
\trim($str) === '' |
13440
|
|
|
) { |
13441
|
2 |
|
continue; |
13442
|
|
|
} |
13443
|
|
|
|
13444
|
2 |
|
$return[] = $str; |
13445
|
|
|
} |
13446
|
|
|
|
13447
|
2 |
|
return $return; |
13448
|
|
|
} |
13449
|
|
|
|
13450
|
|
|
/** |
13451
|
|
|
* rxClass |
13452
|
|
|
* |
13453
|
|
|
* @param string $s |
13454
|
|
|
* @param string $class |
13455
|
|
|
* |
13456
|
|
|
* @return string |
13457
|
|
|
* * |
13458
|
|
|
* @psalm-pure |
13459
|
|
|
*/ |
13460
|
36 |
|
private static function rxClass(string $s, string $class = '') |
13461
|
|
|
{ |
13462
|
|
|
/** |
13463
|
|
|
* @psalm-suppress ImpureStaticVariable |
13464
|
|
|
* |
13465
|
|
|
* @var array<string,string> |
13466
|
|
|
*/ |
13467
|
36 |
|
static $RX_CLASS_CACHE = []; |
13468
|
|
|
|
13469
|
36 |
|
$cache_key = $s . '_' . $class; |
13470
|
|
|
|
13471
|
36 |
|
if (isset($RX_CLASS_CACHE[$cache_key])) { |
13472
|
24 |
|
return $RX_CLASS_CACHE[$cache_key]; |
13473
|
|
|
} |
13474
|
|
|
|
13475
|
16 |
|
$class_array[] = $class; |
|
|
|
|
13476
|
|
|
|
13477
|
|
|
/** @noinspection SuspiciousLoopInspection */ |
13478
|
|
|
/** @noinspection AlterInForeachInspection */ |
13479
|
16 |
|
foreach (self::str_split($s) as &$s) { |
|
|
|
|
13480
|
15 |
|
if ($s === '-') { |
13481
|
|
|
$class_array[0] = '-' . $class_array[0]; |
13482
|
15 |
|
} elseif (!isset($s[2])) { |
13483
|
15 |
|
$class_array[0] .= \preg_quote($s, '/'); |
13484
|
1 |
|
} elseif (self::strlen($s) === 1) { |
13485
|
1 |
|
$class_array[0] .= $s; |
13486
|
|
|
} else { |
13487
|
|
|
$class_array[] = $s; |
13488
|
|
|
} |
13489
|
|
|
} |
13490
|
|
|
|
13491
|
16 |
|
if ($class_array[0]) { |
13492
|
16 |
|
$class_array[0] = '[' . $class_array[0] . ']'; |
13493
|
|
|
} |
13494
|
|
|
|
13495
|
16 |
|
if (\count($class_array) === 1) { |
13496
|
16 |
|
$return = $class_array[0]; |
13497
|
|
|
} else { |
13498
|
|
|
$return = '(?:' . \implode('|', $class_array) . ')'; |
13499
|
|
|
} |
13500
|
|
|
|
13501
|
16 |
|
$RX_CLASS_CACHE[$cache_key] = $return; |
13502
|
|
|
|
13503
|
16 |
|
return $return; |
13504
|
|
|
} |
13505
|
|
|
|
13506
|
|
|
/** |
13507
|
|
|
* Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius"). |
13508
|
|
|
* |
13509
|
|
|
* @param string $names |
13510
|
|
|
* @param string $delimiter |
13511
|
|
|
* @param string $encoding |
13512
|
|
|
* |
13513
|
|
|
* @psalm-pure |
13514
|
|
|
* |
13515
|
|
|
* @return string |
13516
|
|
|
* |
13517
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
13518
|
|
|
*/ |
13519
|
1 |
|
private static function str_capitalize_name_helper( |
13520
|
|
|
string $names, |
13521
|
|
|
string $delimiter, |
13522
|
|
|
string $encoding = 'UTF-8' |
13523
|
|
|
) { |
13524
|
|
|
// init |
13525
|
1 |
|
$name_helper_array = \explode($delimiter, $names); |
13526
|
1 |
|
if ($name_helper_array === false) { |
13527
|
|
|
return ''; |
13528
|
|
|
} |
13529
|
|
|
|
13530
|
1 |
|
$special_cases = [ |
13531
|
|
|
'names' => [ |
13532
|
|
|
'ab', |
13533
|
|
|
'af', |
13534
|
|
|
'al', |
13535
|
|
|
'and', |
13536
|
|
|
'ap', |
13537
|
|
|
'bint', |
13538
|
|
|
'binte', |
13539
|
|
|
'da', |
13540
|
|
|
'de', |
13541
|
|
|
'del', |
13542
|
|
|
'den', |
13543
|
|
|
'der', |
13544
|
|
|
'di', |
13545
|
|
|
'dit', |
13546
|
|
|
'ibn', |
13547
|
|
|
'la', |
13548
|
|
|
'mac', |
13549
|
|
|
'nic', |
13550
|
|
|
'of', |
13551
|
|
|
'ter', |
13552
|
|
|
'the', |
13553
|
|
|
'und', |
13554
|
|
|
'van', |
13555
|
|
|
'von', |
13556
|
|
|
'y', |
13557
|
|
|
'zu', |
13558
|
|
|
], |
13559
|
|
|
'prefixes' => [ |
13560
|
|
|
'al-', |
13561
|
|
|
"d'", |
13562
|
|
|
'ff', |
13563
|
|
|
"l'", |
13564
|
|
|
'mac', |
13565
|
|
|
'mc', |
13566
|
|
|
'nic', |
13567
|
|
|
], |
13568
|
|
|
]; |
13569
|
|
|
|
13570
|
1 |
|
foreach ($name_helper_array as &$name) { |
13571
|
1 |
|
if (\in_array($name, $special_cases['names'], true)) { |
13572
|
1 |
|
continue; |
13573
|
|
|
} |
13574
|
|
|
|
13575
|
1 |
|
$continue = false; |
13576
|
|
|
|
13577
|
1 |
|
if ($delimiter === '-') { |
13578
|
1 |
|
foreach ((array) $special_cases['names'] as &$beginning) { |
13579
|
1 |
|
if (\strncmp($name, $beginning, \strlen($beginning)) === 0) { |
13580
|
1 |
|
$continue = true; |
13581
|
|
|
|
13582
|
1 |
|
break; |
13583
|
|
|
} |
13584
|
|
|
} |
13585
|
1 |
|
unset($beginning); |
13586
|
|
|
} |
13587
|
|
|
|
13588
|
1 |
|
foreach ((array) $special_cases['prefixes'] as &$beginning) { |
13589
|
1 |
|
if (\strncmp($name, $beginning, \strlen($beginning)) === 0) { |
13590
|
1 |
|
$continue = true; |
13591
|
|
|
|
13592
|
1 |
|
break; |
13593
|
|
|
} |
13594
|
|
|
} |
13595
|
1 |
|
unset($beginning); |
13596
|
|
|
|
13597
|
1 |
|
if ($continue) { |
13598
|
1 |
|
continue; |
13599
|
|
|
} |
13600
|
|
|
|
13601
|
1 |
|
$name = self::ucfirst($name, $encoding); |
13602
|
|
|
} |
13603
|
|
|
|
13604
|
1 |
|
return \implode($delimiter, $name_helper_array); |
13605
|
|
|
} |
13606
|
|
|
|
13607
|
|
|
/** |
13608
|
|
|
* Generic case-sensitive transformation for collation matching. |
13609
|
|
|
* |
13610
|
|
|
* @param string $str <p>The input string</p> |
13611
|
|
|
* |
13612
|
|
|
* @psalm-pure |
13613
|
|
|
* |
13614
|
|
|
* @return string|null |
13615
|
|
|
* |
13616
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
13617
|
|
|
*/ |
13618
|
6 |
|
private static function strtonatfold(string $str) |
13619
|
|
|
{ |
13620
|
6 |
|
$str = \Normalizer::normalize($str, \Normalizer::NFD); |
13621
|
6 |
|
if ($str === false) { |
13622
|
2 |
|
return ''; |
13623
|
|
|
} |
13624
|
|
|
|
13625
|
6 |
|
return \preg_replace( |
13626
|
6 |
|
'/\p{Mn}+/u', |
13627
|
6 |
|
'', |
13628
|
6 |
|
$str |
13629
|
|
|
); |
13630
|
|
|
} |
13631
|
|
|
|
13632
|
|
|
/** |
13633
|
|
|
* @param int|string $input |
13634
|
|
|
* |
13635
|
|
|
* @psalm-pure |
13636
|
|
|
* |
13637
|
|
|
* @return string |
13638
|
|
|
* |
13639
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
13640
|
|
|
*/ |
13641
|
29 |
|
private static function to_utf8_convert_helper($input) |
13642
|
|
|
{ |
13643
|
|
|
// init |
13644
|
29 |
|
$buf = ''; |
13645
|
|
|
|
13646
|
29 |
|
if (self::$ORD === null) { |
13647
|
|
|
self::$ORD = self::getData('ord'); |
13648
|
|
|
} |
13649
|
|
|
|
13650
|
29 |
|
if (self::$CHR === null) { |
13651
|
|
|
self::$CHR = self::getData('chr'); |
13652
|
|
|
} |
13653
|
|
|
|
13654
|
29 |
|
if (self::$WIN1252_TO_UTF8 === null) { |
13655
|
1 |
|
self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8'); |
13656
|
|
|
} |
13657
|
|
|
|
13658
|
29 |
|
$ordC1 = self::$ORD[$input]; |
13659
|
29 |
|
if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases |
13660
|
29 |
|
$buf .= self::$WIN1252_TO_UTF8[$ordC1]; |
13661
|
|
|
} else { |
13662
|
|
|
/** @noinspection OffsetOperationsInspection */ |
13663
|
|
|
$cc1 = self::$CHR[$ordC1 / 64] | "\xC0"; |
13664
|
|
|
$cc2 = ((string) $input & "\x3F") | "\x80"; |
|
|
|
|
13665
|
|
|
$buf .= $cc1 . $cc2; |
13666
|
|
|
} |
13667
|
|
|
|
13668
|
29 |
|
return $buf; |
13669
|
|
|
} |
13670
|
|
|
|
13671
|
|
|
/** |
13672
|
|
|
* @param string $str |
13673
|
|
|
* |
13674
|
|
|
* @psalm-pure |
13675
|
|
|
* |
13676
|
|
|
* @return string |
13677
|
|
|
* |
13678
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
13679
|
|
|
*/ |
13680
|
9 |
|
private static function urldecode_unicode_helper(string $str) |
13681
|
|
|
{ |
13682
|
9 |
|
if (\strpos($str, '%u') === false) { |
13683
|
9 |
|
return $str; |
13684
|
|
|
} |
13685
|
|
|
|
13686
|
7 |
|
$pattern = '/%u([0-9a-fA-F]{3,4})/'; |
13687
|
7 |
|
if (\preg_match($pattern, $str)) { |
13688
|
7 |
|
$str = (string) \preg_replace($pattern, '&#x\\1;', $str); |
13689
|
|
|
} |
13690
|
|
|
|
13691
|
7 |
|
return $str; |
13692
|
|
|
} |
13693
|
|
|
} |
13694
|
|
|
|