|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare(strict_types=1); |
|
4
|
|
|
|
|
5
|
|
|
namespace voku\helper; |
|
6
|
|
|
|
|
7
|
|
|
/** |
|
8
|
|
|
* @psalm-immutable |
|
9
|
|
|
*/ |
|
10
|
|
|
final class UTF8 |
|
11
|
|
|
{ |
|
12
|
|
|
/** |
|
13
|
|
|
* Bom => Byte-Length |
|
14
|
|
|
* |
|
15
|
|
|
* INFO: https://en.wikipedia.org/wiki/Byte_order_mark |
|
16
|
|
|
* |
|
17
|
|
|
* @var array<string, int> |
|
18
|
|
|
*/ |
|
19
|
|
|
private static $BOM = [ |
|
20
|
|
|
"\xef\xbb\xbf" => 3, // UTF-8 BOM |
|
21
|
|
|
'' => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...) |
|
22
|
|
|
"\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM |
|
23
|
|
|
' þÿ' => 6, // UTF-32 (BE) BOM as "WINDOWS-1252" |
|
24
|
|
|
"\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM |
|
25
|
|
|
'ÿþ ' => 6, // UTF-32 (LE) BOM as "WINDOWS-1252" |
|
26
|
|
|
"\xfe\xff" => 2, // UTF-16 (BE) BOM |
|
27
|
|
|
'þÿ' => 4, // UTF-16 (BE) BOM as "WINDOWS-1252" |
|
28
|
|
|
"\xff\xfe" => 2, // UTF-16 (LE) BOM |
|
29
|
|
|
'ÿþ' => 4, // UTF-16 (LE) BOM as "WINDOWS-1252" |
|
30
|
|
|
]; |
|
31
|
|
|
|
|
32
|
|
|
/** |
|
33
|
|
|
* Numeric code point => UTF-8 Character |
|
34
|
|
|
* |
|
35
|
|
|
* url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp |
|
36
|
|
|
* |
|
37
|
|
|
* @var array<int, string> |
|
38
|
|
|
*/ |
|
39
|
|
|
private static $WHITESPACE = [ |
|
40
|
|
|
// NULL Byte |
|
41
|
|
|
0 => "\x0", |
|
42
|
|
|
// Tab |
|
43
|
|
|
9 => "\x9", |
|
44
|
|
|
// New Line |
|
45
|
|
|
10 => "\xa", |
|
46
|
|
|
// Vertical Tab |
|
47
|
|
|
11 => "\xb", |
|
48
|
|
|
// Carriage Return |
|
49
|
|
|
13 => "\xd", |
|
50
|
|
|
// Ordinary Space |
|
51
|
|
|
32 => "\x20", |
|
52
|
|
|
// NO-BREAK SPACE |
|
53
|
|
|
160 => "\xc2\xa0", |
|
54
|
|
|
// OGHAM SPACE MARK |
|
55
|
|
|
5760 => "\xe1\x9a\x80", |
|
56
|
|
|
// MONGOLIAN VOWEL SEPARATOR |
|
57
|
|
|
6158 => "\xe1\xa0\x8e", |
|
58
|
|
|
// EN QUAD |
|
59
|
|
|
8192 => "\xe2\x80\x80", |
|
60
|
|
|
// EM QUAD |
|
61
|
|
|
8193 => "\xe2\x80\x81", |
|
62
|
|
|
// EN SPACE |
|
63
|
|
|
8194 => "\xe2\x80\x82", |
|
64
|
|
|
// EM SPACE |
|
65
|
|
|
8195 => "\xe2\x80\x83", |
|
66
|
|
|
// THREE-PER-EM SPACE |
|
67
|
|
|
8196 => "\xe2\x80\x84", |
|
68
|
|
|
// FOUR-PER-EM SPACE |
|
69
|
|
|
8197 => "\xe2\x80\x85", |
|
70
|
|
|
// SIX-PER-EM SPACE |
|
71
|
|
|
8198 => "\xe2\x80\x86", |
|
72
|
|
|
// FIGURE SPACE |
|
73
|
|
|
8199 => "\xe2\x80\x87", |
|
74
|
|
|
// PUNCTUATION SPACE |
|
75
|
|
|
8200 => "\xe2\x80\x88", |
|
76
|
|
|
// THIN SPACE |
|
77
|
|
|
8201 => "\xe2\x80\x89", |
|
78
|
|
|
// HAIR SPACE |
|
79
|
|
|
8202 => "\xe2\x80\x8a", |
|
80
|
|
|
// LINE SEPARATOR |
|
81
|
|
|
8232 => "\xe2\x80\xa8", |
|
82
|
|
|
// PARAGRAPH SEPARATOR |
|
83
|
|
|
8233 => "\xe2\x80\xa9", |
|
84
|
|
|
// NARROW NO-BREAK SPACE |
|
85
|
|
|
8239 => "\xe2\x80\xaf", |
|
86
|
|
|
// MEDIUM MATHEMATICAL SPACE |
|
87
|
|
|
8287 => "\xe2\x81\x9f", |
|
88
|
|
|
// HALFWIDTH HANGUL FILLER |
|
89
|
|
|
65440 => "\xef\xbe\xa0", |
|
90
|
|
|
// IDEOGRAPHIC SPACE |
|
91
|
|
|
12288 => "\xe3\x80\x80", |
|
92
|
|
|
]; |
|
93
|
|
|
|
|
94
|
|
|
/** |
|
95
|
|
|
* @var array<string, string> |
|
96
|
|
|
*/ |
|
97
|
|
|
private static $WHITESPACE_TABLE = [ |
|
98
|
|
|
'SPACE' => "\x20", |
|
99
|
|
|
'NO-BREAK SPACE' => "\xc2\xa0", |
|
100
|
|
|
'OGHAM SPACE MARK' => "\xe1\x9a\x80", |
|
101
|
|
|
'EN QUAD' => "\xe2\x80\x80", |
|
102
|
|
|
'EM QUAD' => "\xe2\x80\x81", |
|
103
|
|
|
'EN SPACE' => "\xe2\x80\x82", |
|
104
|
|
|
'EM SPACE' => "\xe2\x80\x83", |
|
105
|
|
|
'THREE-PER-EM SPACE' => "\xe2\x80\x84", |
|
106
|
|
|
'FOUR-PER-EM SPACE' => "\xe2\x80\x85", |
|
107
|
|
|
'SIX-PER-EM SPACE' => "\xe2\x80\x86", |
|
108
|
|
|
'FIGURE SPACE' => "\xe2\x80\x87", |
|
109
|
|
|
'PUNCTUATION SPACE' => "\xe2\x80\x88", |
|
110
|
|
|
'THIN SPACE' => "\xe2\x80\x89", |
|
111
|
|
|
'HAIR SPACE' => "\xe2\x80\x8a", |
|
112
|
|
|
'LINE SEPARATOR' => "\xe2\x80\xa8", |
|
113
|
|
|
'PARAGRAPH SEPARATOR' => "\xe2\x80\xa9", |
|
114
|
|
|
'ZERO WIDTH SPACE' => "\xe2\x80\x8b", |
|
115
|
|
|
'NARROW NO-BREAK SPACE' => "\xe2\x80\xaf", |
|
116
|
|
|
'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f", |
|
117
|
|
|
'IDEOGRAPHIC SPACE' => "\xe3\x80\x80", |
|
118
|
|
|
'HALFWIDTH HANGUL FILLER' => "\xef\xbe\xa0", |
|
119
|
|
|
]; |
|
120
|
|
|
|
|
121
|
|
|
/** |
|
122
|
|
|
* @var array |
|
123
|
|
|
* |
|
124
|
|
|
* @phpstan-var array{upper: string[], lower: string[]} |
|
125
|
|
|
*/ |
|
126
|
|
|
private static $COMMON_CASE_FOLD = [ |
|
127
|
|
|
'upper' => [ |
|
128
|
|
|
'µ', |
|
129
|
|
|
'ſ', |
|
130
|
|
|
"\xCD\x85", |
|
131
|
|
|
'ς', |
|
132
|
|
|
'ẞ', |
|
133
|
|
|
"\xCF\x90", |
|
134
|
|
|
"\xCF\x91", |
|
135
|
|
|
"\xCF\x95", |
|
136
|
|
|
"\xCF\x96", |
|
137
|
|
|
"\xCF\xB0", |
|
138
|
|
|
"\xCF\xB1", |
|
139
|
|
|
"\xCF\xB5", |
|
140
|
|
|
"\xE1\xBA\x9B", |
|
141
|
|
|
"\xE1\xBE\xBE", |
|
142
|
|
|
], |
|
143
|
|
|
'lower' => [ |
|
144
|
|
|
'μ', |
|
145
|
|
|
's', |
|
146
|
|
|
'ι', |
|
147
|
|
|
'σ', |
|
148
|
|
|
'ß', |
|
149
|
|
|
'β', |
|
150
|
|
|
'θ', |
|
151
|
|
|
'φ', |
|
152
|
|
|
'π', |
|
153
|
|
|
'κ', |
|
154
|
|
|
'ρ', |
|
155
|
|
|
'ε', |
|
156
|
|
|
"\xE1\xB9\xA1", |
|
157
|
|
|
'ι', |
|
158
|
|
|
], |
|
159
|
|
|
]; |
|
160
|
|
|
|
|
161
|
|
|
/** |
|
162
|
|
|
* @var array |
|
163
|
|
|
* |
|
164
|
|
|
* @phpstan-var array<string, mixed> |
|
165
|
|
|
*/ |
|
166
|
|
|
private static $SUPPORT = []; |
|
167
|
|
|
|
|
168
|
|
|
/** |
|
169
|
|
|
* @var string[]|null |
|
170
|
|
|
* |
|
171
|
|
|
* @phpstan-var array<string, string>|null |
|
172
|
|
|
*/ |
|
173
|
|
|
private static $BROKEN_UTF8_FIX; |
|
174
|
|
|
|
|
175
|
|
|
/** |
|
176
|
|
|
* @var string[]|null |
|
177
|
|
|
* |
|
178
|
|
|
* @phpstan-var array<int, string>|null |
|
179
|
|
|
*/ |
|
180
|
|
|
private static $WIN1252_TO_UTF8; |
|
181
|
|
|
|
|
182
|
|
|
/** |
|
183
|
|
|
* @var string[]|null |
|
184
|
|
|
* |
|
185
|
|
|
* @phpstan-var array<int ,string>|null |
|
186
|
|
|
*/ |
|
187
|
|
|
private static $INTL_TRANSLITERATOR_LIST; |
|
188
|
|
|
|
|
189
|
|
|
/** |
|
190
|
|
|
* @var string[]|null |
|
191
|
|
|
* |
|
192
|
|
|
* @phpstan-var array<string>|null |
|
193
|
|
|
*/ |
|
194
|
|
|
private static $ENCODINGS; |
|
195
|
|
|
|
|
196
|
|
|
/** |
|
197
|
|
|
* @var int[]|null |
|
198
|
|
|
* |
|
199
|
|
|
* @phpstan-var array<string ,int>|null |
|
200
|
|
|
*/ |
|
201
|
|
|
private static $ORD; |
|
202
|
|
|
|
|
203
|
|
|
/** |
|
204
|
|
|
* @var string[]|null |
|
205
|
|
|
* |
|
206
|
|
|
* @phpstan-var array<string, string>|null |
|
207
|
|
|
*/ |
|
208
|
|
|
private static $EMOJI; |
|
209
|
|
|
|
|
210
|
|
|
/** |
|
211
|
|
|
* @var string[]|null |
|
212
|
|
|
* |
|
213
|
|
|
* @phpstan-var array<string>|null |
|
214
|
|
|
*/ |
|
215
|
|
|
private static $EMOJI_VALUES_CACHE; |
|
216
|
|
|
|
|
217
|
|
|
/** |
|
218
|
|
|
* @var string[]|null |
|
219
|
|
|
* |
|
220
|
|
|
* @phpstan-var array<string>|null |
|
221
|
|
|
*/ |
|
222
|
|
|
private static $EMOJI_KEYS_CACHE; |
|
223
|
|
|
|
|
224
|
|
|
/** |
|
225
|
|
|
* @var string[]|null |
|
226
|
|
|
* |
|
227
|
|
|
* @phpstan-var array<string>|null |
|
228
|
|
|
*/ |
|
229
|
|
|
private static $EMOJI_KEYS_REVERSIBLE_CACHE; |
|
230
|
|
|
|
|
231
|
|
|
/** |
|
232
|
|
|
* @var string[]|null |
|
233
|
|
|
* |
|
234
|
|
|
* @phpstan-var array<int, string>|null |
|
235
|
|
|
*/ |
|
236
|
|
|
private static $CHR; |
|
237
|
|
|
|
|
238
|
|
|
/** |
|
239
|
|
|
* __construct() |
|
240
|
|
|
*/ |
|
241
|
34 |
|
public function __construct() |
|
242
|
|
|
{ |
|
243
|
34 |
|
} |
|
244
|
|
|
|
|
245
|
|
|
/** |
|
246
|
|
|
* Return the character at the specified position: $str[1] like functionality. |
|
247
|
|
|
* |
|
248
|
|
|
* EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code> |
|
249
|
|
|
* |
|
250
|
|
|
* @param string $str <p>A UTF-8 string.</p> |
|
251
|
|
|
* @param int $pos <p>The position of character to return.</p> |
|
252
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
253
|
|
|
* |
|
254
|
|
|
* @psalm-pure |
|
255
|
|
|
* |
|
256
|
|
|
* @return string |
|
257
|
|
|
* <p>Single multi-byte character.</p> |
|
258
|
|
|
*/ |
|
259
|
3 |
|
public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string |
|
260
|
|
|
{ |
|
261
|
3 |
|
if ($str === '' || $pos < 0) { |
|
262
|
2 |
|
return ''; |
|
263
|
|
|
} |
|
264
|
|
|
|
|
265
|
3 |
|
if ($encoding === 'UTF-8') { |
|
266
|
3 |
|
return (string) \mb_substr($str, $pos, 1); |
|
267
|
|
|
} |
|
268
|
|
|
|
|
269
|
|
|
return (string) self::substr($str, $pos, 1, $encoding); |
|
270
|
|
|
} |
|
271
|
|
|
|
|
272
|
|
|
/** |
|
273
|
|
|
* Prepends UTF-8 BOM character to the string and returns the whole string. |
|
274
|
|
|
* |
|
275
|
|
|
* INFO: If BOM already existed there, the Input string is returned. |
|
276
|
|
|
* |
|
277
|
|
|
* EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code> |
|
278
|
|
|
* |
|
279
|
|
|
* @param string $str <p>The input string.</p> |
|
280
|
|
|
* |
|
281
|
|
|
* @psalm-pure |
|
282
|
|
|
* |
|
283
|
|
|
* @return string |
|
284
|
|
|
* <p>The output string that contains BOM.</p> |
|
285
|
|
|
*/ |
|
286
|
2 |
|
public static function add_bom_to_string(string $str): string |
|
287
|
|
|
{ |
|
288
|
2 |
|
if (!self::string_has_bom($str)) { |
|
289
|
2 |
|
$str = self::bom() . $str; |
|
290
|
|
|
} |
|
291
|
|
|
|
|
292
|
2 |
|
return $str; |
|
293
|
|
|
} |
|
294
|
|
|
|
|
295
|
|
|
/** |
|
296
|
|
|
* Changes all keys in an array. |
|
297
|
|
|
* |
|
298
|
|
|
* @param array<string, mixed> $array <p>The array to work on</p> |
|
299
|
|
|
* @param int $case [optional] <p> Either <strong>CASE_UPPER</strong><br> |
|
300
|
|
|
* or <strong>CASE_LOWER</strong> (default)</p> |
|
301
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
302
|
|
|
* |
|
303
|
|
|
* @psalm-pure |
|
304
|
|
|
* |
|
305
|
|
|
* @return string[] |
|
306
|
|
|
* <p>An array with its keys lower- or uppercased.</p> |
|
307
|
|
|
*/ |
|
308
|
2 |
|
public static function array_change_key_case( |
|
309
|
|
|
array $array, |
|
310
|
|
|
int $case = \CASE_LOWER, |
|
311
|
|
|
string $encoding = 'UTF-8' |
|
312
|
|
|
): array { |
|
313
|
|
|
if ( |
|
314
|
2 |
|
$case !== \CASE_LOWER |
|
315
|
|
|
&& |
|
316
|
2 |
|
$case !== \CASE_UPPER |
|
317
|
|
|
) { |
|
318
|
|
|
$case = \CASE_LOWER; |
|
319
|
|
|
} |
|
320
|
|
|
|
|
321
|
2 |
|
$return = []; |
|
322
|
2 |
|
foreach ($array as $key => &$value) { |
|
323
|
2 |
|
$key = $case === \CASE_LOWER |
|
324
|
2 |
|
? self::strtolower($key, $encoding) |
|
325
|
2 |
|
: self::strtoupper($key, $encoding); |
|
326
|
|
|
|
|
327
|
2 |
|
$return[$key] = $value; |
|
328
|
|
|
} |
|
329
|
|
|
|
|
330
|
2 |
|
return $return; |
|
331
|
|
|
} |
|
332
|
|
|
|
|
333
|
|
|
/** |
|
334
|
|
|
* Returns the substring between $start and $end, if found, or an empty |
|
335
|
|
|
* string. An optional offset may be supplied from which to begin the |
|
336
|
|
|
* search for the start string. |
|
337
|
|
|
* |
|
338
|
|
|
* @param string $str |
|
339
|
|
|
* @param string $start <p>Delimiter marking the start of the substring.</p> |
|
340
|
|
|
* @param string $end <p>Delimiter marking the end of the substring.</p> |
|
341
|
|
|
* @param int $offset [optional] <p>Index from which to begin the search. Default: 0</p> |
|
342
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
343
|
|
|
* |
|
344
|
|
|
* @psalm-pure |
|
345
|
|
|
* |
|
346
|
|
|
* @return string |
|
347
|
|
|
*/ |
|
348
|
16 |
|
public static function between( |
|
349
|
|
|
string $str, |
|
350
|
|
|
string $start, |
|
351
|
|
|
string $end, |
|
352
|
|
|
int $offset = 0, |
|
353
|
|
|
string $encoding = 'UTF-8' |
|
354
|
|
|
): string { |
|
355
|
16 |
|
if ($encoding === 'UTF-8') { |
|
356
|
8 |
|
$start_position = \mb_strpos($str, $start, $offset); |
|
357
|
8 |
|
if ($start_position === false) { |
|
358
|
1 |
|
return ''; |
|
359
|
|
|
} |
|
360
|
|
|
|
|
361
|
7 |
|
$substr_index = $start_position + (int) \mb_strlen($start); |
|
362
|
7 |
|
$end_position = \mb_strpos($str, $end, $substr_index); |
|
363
|
|
|
if ( |
|
364
|
7 |
|
$end_position === false |
|
365
|
|
|
|| |
|
366
|
7 |
|
$end_position === $substr_index |
|
367
|
|
|
) { |
|
368
|
2 |
|
return ''; |
|
369
|
|
|
} |
|
370
|
|
|
|
|
371
|
5 |
|
return (string) \mb_substr($str, $substr_index, $end_position - $substr_index); |
|
372
|
|
|
} |
|
373
|
|
|
|
|
374
|
8 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
375
|
|
|
|
|
376
|
8 |
|
$start_position = self::strpos($str, $start, $offset, $encoding); |
|
377
|
8 |
|
if ($start_position === false) { |
|
378
|
1 |
|
return ''; |
|
379
|
|
|
} |
|
380
|
|
|
|
|
381
|
7 |
|
$substr_index = $start_position + (int) self::strlen($start, $encoding); |
|
382
|
7 |
|
$end_position = self::strpos($str, $end, $substr_index, $encoding); |
|
383
|
|
|
if ( |
|
384
|
7 |
|
$end_position === false |
|
385
|
|
|
|| |
|
386
|
7 |
|
$end_position === $substr_index |
|
387
|
|
|
) { |
|
388
|
2 |
|
return ''; |
|
389
|
|
|
} |
|
390
|
|
|
|
|
391
|
5 |
|
return (string) self::substr( |
|
392
|
5 |
|
$str, |
|
393
|
5 |
|
$substr_index, |
|
394
|
5 |
|
$end_position - $substr_index, |
|
395
|
5 |
|
$encoding |
|
396
|
|
|
); |
|
397
|
|
|
} |
|
398
|
|
|
|
|
399
|
|
|
/** |
|
400
|
|
|
* Convert binary into a string. |
|
401
|
|
|
* |
|
402
|
|
|
* INFO: opposite to UTF8::str_to_binary() |
|
403
|
|
|
* |
|
404
|
|
|
* EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code> |
|
405
|
|
|
* |
|
406
|
|
|
* @param string $bin 1|0 |
|
407
|
|
|
* |
|
408
|
|
|
* @psalm-pure |
|
409
|
|
|
* |
|
410
|
|
|
* @return string |
|
411
|
|
|
*/ |
|
412
|
2 |
|
public static function binary_to_str($bin): string |
|
413
|
|
|
{ |
|
414
|
2 |
|
if (!isset($bin[0])) { |
|
415
|
|
|
return ''; |
|
416
|
|
|
} |
|
417
|
|
|
|
|
418
|
2 |
|
$convert = \base_convert($bin, 2, 16); |
|
419
|
2 |
|
if ($convert === '0') { |
|
420
|
1 |
|
return ''; |
|
421
|
|
|
} |
|
422
|
|
|
|
|
423
|
2 |
|
return \pack('H*', $convert); |
|
424
|
|
|
} |
|
425
|
|
|
|
|
426
|
|
|
/** |
|
427
|
|
|
* Returns the UTF-8 Byte Order Mark Character. |
|
428
|
|
|
* |
|
429
|
|
|
* INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values |
|
430
|
|
|
* |
|
431
|
|
|
* EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code> |
|
432
|
|
|
* |
|
433
|
|
|
* @psalm-pure |
|
434
|
|
|
* |
|
435
|
|
|
* @return string |
|
436
|
|
|
* <p>UTF-8 Byte Order Mark.</p> |
|
437
|
|
|
*/ |
|
438
|
4 |
|
public static function bom(): string |
|
439
|
|
|
{ |
|
440
|
4 |
|
return "\xef\xbb\xbf"; |
|
441
|
|
|
} |
|
442
|
|
|
|
|
443
|
|
|
/** |
|
444
|
|
|
* @alias of UTF8::chr_map() |
|
445
|
|
|
* |
|
446
|
|
|
* @param callable $callback |
|
447
|
|
|
* @param string $str |
|
448
|
|
|
* |
|
449
|
|
|
* @psalm-pure |
|
450
|
|
|
* |
|
451
|
|
|
* @return string[] |
|
452
|
|
|
* |
|
453
|
|
|
* @see UTF8::chr_map() |
|
454
|
|
|
*/ |
|
455
|
2 |
|
public static function callback($callback, string $str): array |
|
456
|
|
|
{ |
|
457
|
2 |
|
return self::chr_map($callback, $str); |
|
458
|
|
|
} |
|
459
|
|
|
|
|
460
|
|
|
/** |
|
461
|
|
|
* Returns the character at $index, with indexes starting at 0. |
|
462
|
|
|
* |
|
463
|
|
|
* @param string $str <p>The input string.</p> |
|
464
|
|
|
* @param int $index <p>Position of the character.</p> |
|
465
|
|
|
* @param string $encoding [optional] <p>Default is UTF-8</p> |
|
466
|
|
|
* |
|
467
|
|
|
* @psalm-pure |
|
468
|
|
|
* |
|
469
|
|
|
* @return string |
|
470
|
|
|
* <p>The character at $index.</p> |
|
471
|
|
|
*/ |
|
472
|
9 |
|
public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string |
|
473
|
|
|
{ |
|
474
|
9 |
|
if ($encoding === 'UTF-8') { |
|
475
|
5 |
|
return (string) \mb_substr($str, $index, 1); |
|
476
|
|
|
} |
|
477
|
|
|
|
|
478
|
4 |
|
return (string) self::substr($str, $index, 1, $encoding); |
|
479
|
|
|
} |
|
480
|
|
|
|
|
481
|
|
|
/** |
|
482
|
|
|
* Returns an array consisting of the characters in the string. |
|
483
|
|
|
* |
|
484
|
|
|
* @param string $str <p>The input string.</p> |
|
485
|
|
|
* |
|
486
|
|
|
* @psalm-pure |
|
487
|
|
|
* |
|
488
|
|
|
* @return string[] |
|
489
|
|
|
* <p>An array of chars.</p> |
|
490
|
|
|
*/ |
|
491
|
4 |
|
public static function chars(string $str): array |
|
492
|
|
|
{ |
|
493
|
|
|
/** @var string[] */ |
|
494
|
4 |
|
return self::str_split($str); |
|
|
|
|
|
|
495
|
|
|
} |
|
496
|
|
|
|
|
497
|
|
|
/** |
|
498
|
|
|
* This method will auto-detect your server environment for UTF-8 support. |
|
499
|
|
|
* |
|
500
|
|
|
* @return true|null |
|
501
|
|
|
* |
|
502
|
|
|
* @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p> |
|
503
|
|
|
*/ |
|
504
|
4 |
|
public static function checkForSupport() |
|
505
|
|
|
{ |
|
506
|
4 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
|
507
|
|
|
self::$SUPPORT['already_checked_via_portable_utf8'] = true; |
|
508
|
|
|
|
|
509
|
|
|
// http://php.net/manual/en/book.mbstring.php |
|
510
|
|
|
self::$SUPPORT['mbstring'] = self::mbstring_loaded(); |
|
511
|
|
|
|
|
512
|
|
|
self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded(); |
|
513
|
|
|
if (self::$SUPPORT['mbstring'] === true) { |
|
514
|
|
|
\mb_internal_encoding('UTF-8'); |
|
515
|
|
|
\mb_regex_encoding('UTF-8'); |
|
516
|
|
|
self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8'; |
|
517
|
|
|
} |
|
518
|
|
|
|
|
519
|
|
|
// http://php.net/manual/en/book.iconv.php |
|
520
|
|
|
self::$SUPPORT['iconv'] = self::iconv_loaded(); |
|
521
|
|
|
|
|
522
|
|
|
// http://php.net/manual/en/book.intl.php |
|
523
|
|
|
self::$SUPPORT['intl'] = self::intl_loaded(); |
|
524
|
|
|
|
|
525
|
|
|
// http://php.net/manual/en/class.intlchar.php |
|
526
|
|
|
self::$SUPPORT['intlChar'] = self::intlChar_loaded(); |
|
527
|
|
|
|
|
528
|
|
|
// http://php.net/manual/en/book.ctype.php |
|
529
|
|
|
self::$SUPPORT['ctype'] = self::ctype_loaded(); |
|
530
|
|
|
|
|
531
|
|
|
// http://php.net/manual/en/class.finfo.php |
|
532
|
|
|
self::$SUPPORT['finfo'] = self::finfo_loaded(); |
|
533
|
|
|
|
|
534
|
|
|
// http://php.net/manual/en/book.json.php |
|
535
|
|
|
self::$SUPPORT['json'] = self::json_loaded(); |
|
536
|
|
|
|
|
537
|
|
|
// http://php.net/manual/en/book.pcre.php |
|
538
|
|
|
self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support(); |
|
539
|
|
|
|
|
540
|
|
|
self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used(); |
|
541
|
|
|
if (self::$SUPPORT['symfony_polyfill_used'] === true) { |
|
542
|
|
|
\mb_internal_encoding('UTF-8'); |
|
543
|
|
|
self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8'; |
|
544
|
|
|
} |
|
545
|
|
|
|
|
546
|
|
|
return true; |
|
547
|
|
|
} |
|
548
|
|
|
|
|
549
|
4 |
|
return null; |
|
550
|
|
|
} |
|
551
|
|
|
|
|
552
|
|
|
/** |
|
553
|
|
|
* Generates a UTF-8 encoded character from the given code point. |
|
554
|
|
|
* |
|
555
|
|
|
* INFO: opposite to UTF8::ord() |
|
556
|
|
|
* |
|
557
|
|
|
* EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code> |
|
558
|
|
|
* |
|
559
|
|
|
* @param int $code_point <p>The code point for which to generate a character.</p> |
|
560
|
|
|
* @param string $encoding [optional] <p>Default is UTF-8</p> |
|
561
|
|
|
* |
|
562
|
|
|
* @psalm-pure |
|
563
|
|
|
* |
|
564
|
|
|
* @return string|null |
|
565
|
|
|
* <p>Multi-byte character, returns null on failure or empty input.</p> |
|
566
|
|
|
*/ |
|
567
|
21 |
|
public static function chr($code_point, string $encoding = 'UTF-8') |
|
568
|
|
|
{ |
|
569
|
|
|
// init |
|
570
|
|
|
/** |
|
571
|
|
|
* @psalm-suppress ImpureStaticVariable |
|
572
|
|
|
* |
|
573
|
|
|
* @var array<string,string> |
|
574
|
|
|
*/ |
|
575
|
21 |
|
static $CHAR_CACHE = []; |
|
576
|
|
|
|
|
577
|
21 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
578
|
5 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
579
|
|
|
} |
|
580
|
|
|
|
|
581
|
|
|
if ( |
|
582
|
21 |
|
$encoding !== 'UTF-8' |
|
583
|
|
|
&& |
|
584
|
21 |
|
$encoding !== 'ISO-8859-1' |
|
585
|
|
|
&& |
|
586
|
21 |
|
$encoding !== 'WINDOWS-1252' |
|
587
|
|
|
&& |
|
588
|
21 |
|
self::$SUPPORT['mbstring'] === false |
|
589
|
|
|
) { |
|
590
|
|
|
/** |
|
591
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
592
|
|
|
*/ |
|
593
|
|
|
\trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
|
594
|
|
|
} |
|
595
|
|
|
|
|
596
|
21 |
|
if (!\is_int($code_point) || $code_point <= 0) { |
|
|
|
|
|
|
597
|
5 |
|
return null; |
|
598
|
|
|
} |
|
599
|
|
|
|
|
600
|
21 |
|
$cache_key = $code_point . '_' . $encoding; |
|
601
|
21 |
|
if (isset($CHAR_CACHE[$cache_key])) { |
|
602
|
19 |
|
return $CHAR_CACHE[$cache_key]; |
|
603
|
|
|
} |
|
604
|
|
|
|
|
605
|
10 |
|
if ($code_point <= 0x80) { // only for "simple"-chars |
|
606
|
|
|
|
|
607
|
9 |
|
if (self::$CHR === null) { |
|
608
|
1 |
|
self::$CHR = self::getData('chr'); |
|
609
|
|
|
} |
|
610
|
|
|
|
|
611
|
|
|
/** |
|
612
|
|
|
* @psalm-suppress PossiblyNullArrayAccess |
|
613
|
|
|
*/ |
|
614
|
9 |
|
$chr = self::$CHR[$code_point]; |
|
615
|
|
|
|
|
616
|
9 |
|
if ($encoding !== 'UTF-8') { |
|
617
|
1 |
|
$chr = self::encode($encoding, $chr); |
|
618
|
|
|
} |
|
619
|
|
|
|
|
620
|
9 |
|
return $CHAR_CACHE[$cache_key] = $chr; |
|
621
|
|
|
} |
|
622
|
|
|
|
|
623
|
|
|
// |
|
624
|
|
|
// fallback via "IntlChar" |
|
625
|
|
|
// |
|
626
|
|
|
|
|
627
|
6 |
|
if (self::$SUPPORT['intlChar'] === true) { |
|
628
|
6 |
|
$chr = \IntlChar::chr($code_point); |
|
629
|
|
|
|
|
630
|
6 |
|
if ($encoding !== 'UTF-8') { |
|
631
|
|
|
$chr = self::encode($encoding, $chr); |
|
632
|
|
|
} |
|
633
|
|
|
|
|
634
|
6 |
|
return $CHAR_CACHE[$cache_key] = $chr; |
|
635
|
|
|
} |
|
636
|
|
|
|
|
637
|
|
|
// |
|
638
|
|
|
// fallback via vanilla php |
|
639
|
|
|
// |
|
640
|
|
|
|
|
641
|
|
|
if (self::$CHR === null) { |
|
642
|
|
|
self::$CHR = self::getData('chr'); |
|
643
|
|
|
} |
|
644
|
|
|
|
|
645
|
|
|
$code_point = (int) $code_point; |
|
646
|
|
|
if ($code_point <= 0x7FF) { |
|
647
|
|
|
/** |
|
648
|
|
|
* @psalm-suppress PossiblyNullArrayAccess |
|
649
|
|
|
*/ |
|
650
|
|
|
$chr = self::$CHR[($code_point >> 6) + 0xC0] . |
|
651
|
|
|
self::$CHR[($code_point & 0x3F) + 0x80]; |
|
652
|
|
|
} elseif ($code_point <= 0xFFFF) { |
|
653
|
|
|
/** |
|
654
|
|
|
* @psalm-suppress PossiblyNullArrayAccess |
|
655
|
|
|
*/ |
|
656
|
|
|
$chr = self::$CHR[($code_point >> 12) + 0xE0] . |
|
657
|
|
|
self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] . |
|
658
|
|
|
self::$CHR[($code_point & 0x3F) + 0x80]; |
|
659
|
|
|
} else { |
|
660
|
|
|
/** |
|
661
|
|
|
* @psalm-suppress PossiblyNullArrayAccess |
|
662
|
|
|
*/ |
|
663
|
|
|
$chr = self::$CHR[($code_point >> 18) + 0xF0] . |
|
664
|
|
|
self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] . |
|
665
|
|
|
self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] . |
|
666
|
|
|
self::$CHR[($code_point & 0x3F) + 0x80]; |
|
667
|
|
|
} |
|
668
|
|
|
|
|
669
|
|
|
if ($encoding !== 'UTF-8') { |
|
670
|
|
|
$chr = self::encode($encoding, $chr); |
|
671
|
|
|
} |
|
672
|
|
|
|
|
673
|
|
|
return $CHAR_CACHE[$cache_key] = $chr; |
|
674
|
|
|
} |
|
675
|
|
|
|
|
676
|
|
|
/** |
|
677
|
|
|
* Applies callback to all characters of a string. |
|
678
|
|
|
* |
|
679
|
|
|
* EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code> |
|
680
|
|
|
* |
|
681
|
|
|
* @param callable $callback <p>The callback function.</p> |
|
682
|
|
|
* @param string $str <p>UTF-8 string to run callback on.</p> |
|
683
|
|
|
* |
|
684
|
|
|
* @psalm-pure |
|
685
|
|
|
* |
|
686
|
|
|
* @return string[] |
|
687
|
|
|
* <p>The outcome of the callback, as array.</p> |
|
688
|
|
|
*/ |
|
689
|
2 |
|
public static function chr_map($callback, string $str): array |
|
690
|
|
|
{ |
|
691
|
2 |
|
return \array_map( |
|
692
|
2 |
|
$callback, |
|
693
|
2 |
|
self::str_split($str) |
|
694
|
|
|
); |
|
695
|
|
|
} |
|
696
|
|
|
|
|
697
|
|
|
/** |
|
698
|
|
|
* Generates an array of byte length of each character of a Unicode string. |
|
699
|
|
|
* |
|
700
|
|
|
* 1 byte => U+0000 - U+007F |
|
701
|
|
|
* 2 byte => U+0080 - U+07FF |
|
702
|
|
|
* 3 byte => U+0800 - U+FFFF |
|
703
|
|
|
* 4 byte => U+10000 - U+10FFFF |
|
704
|
|
|
* |
|
705
|
|
|
* EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code> |
|
706
|
|
|
* |
|
707
|
|
|
* @param string $str <p>The original unicode string.</p> |
|
708
|
|
|
* |
|
709
|
|
|
* @psalm-pure |
|
710
|
|
|
* |
|
711
|
|
|
* @return int[] |
|
712
|
|
|
* <p>An array of byte lengths of each character.</p> |
|
713
|
|
|
*/ |
|
714
|
4 |
|
public static function chr_size_list(string $str): array |
|
715
|
|
|
{ |
|
716
|
4 |
|
if ($str === '') { |
|
717
|
4 |
|
return []; |
|
718
|
|
|
} |
|
719
|
|
|
|
|
720
|
4 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
|
721
|
|
|
return \array_map( |
|
722
|
|
|
static function (string $data): int { |
|
723
|
|
|
// "mb_" is available if overload is used, so use it ... |
|
724
|
|
|
return \mb_strlen($data, 'CP850'); // 8-BIT |
|
725
|
|
|
}, |
|
726
|
|
|
self::str_split($str) |
|
727
|
|
|
); |
|
728
|
|
|
} |
|
729
|
|
|
|
|
730
|
4 |
|
return \array_map('\strlen', self::str_split($str)); |
|
731
|
|
|
} |
|
732
|
|
|
|
|
733
|
|
|
/** |
|
734
|
|
|
* Get a decimal code representation of a specific character. |
|
735
|
|
|
* |
|
736
|
|
|
* INFO: opposite to UTF8::decimal_to_chr() |
|
737
|
|
|
* |
|
738
|
|
|
* EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code> |
|
739
|
|
|
* |
|
740
|
|
|
* @param string $char <p>The input character.</p> |
|
741
|
|
|
* |
|
742
|
|
|
* @psalm-pure |
|
743
|
|
|
* |
|
744
|
|
|
* @return int |
|
745
|
|
|
*/ |
|
746
|
5 |
|
public static function chr_to_decimal(string $char): int |
|
747
|
|
|
{ |
|
748
|
5 |
|
if (self::$SUPPORT['iconv'] === true) { |
|
749
|
5 |
|
$chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char); |
|
750
|
5 |
|
if ($chr_tmp !== false) { |
|
751
|
|
|
/** @phpstan-ignore-next-line - "unpack": only false if the format string contains errors */ |
|
752
|
5 |
|
return \unpack('V', $chr_tmp)[1]; |
|
753
|
|
|
} |
|
754
|
|
|
} |
|
755
|
|
|
|
|
756
|
|
|
$code = self::ord($char[0]); |
|
757
|
|
|
$bytes = 1; |
|
758
|
|
|
|
|
759
|
|
|
if (!($code & 0x80)) { |
|
760
|
|
|
// 0xxxxxxx |
|
761
|
|
|
return $code; |
|
762
|
|
|
} |
|
763
|
|
|
|
|
764
|
|
|
if (($code & 0xe0) === 0xc0) { |
|
765
|
|
|
// 110xxxxx |
|
766
|
|
|
$bytes = 2; |
|
767
|
|
|
$code &= ~0xc0; |
|
768
|
|
|
} elseif (($code & 0xf0) === 0xe0) { |
|
769
|
|
|
// 1110xxxx |
|
770
|
|
|
$bytes = 3; |
|
771
|
|
|
$code &= ~0xe0; |
|
772
|
|
|
} elseif (($code & 0xf8) === 0xf0) { |
|
773
|
|
|
// 11110xxx |
|
774
|
|
|
$bytes = 4; |
|
775
|
|
|
$code &= ~0xf0; |
|
776
|
|
|
} |
|
777
|
|
|
|
|
778
|
|
|
for ($i = 2; $i <= $bytes; ++$i) { |
|
779
|
|
|
// 10xxxxxx |
|
780
|
|
|
$code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80); |
|
781
|
|
|
} |
|
782
|
|
|
|
|
783
|
|
|
return $code; |
|
784
|
|
|
} |
|
785
|
|
|
|
|
786
|
|
|
/** |
|
787
|
|
|
* Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character. |
|
788
|
|
|
* |
|
789
|
|
|
* EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code> |
|
790
|
|
|
* |
|
791
|
|
|
* @param int|string $char <p>The input character</p> |
|
792
|
|
|
* @param string $prefix [optional] |
|
793
|
|
|
* |
|
794
|
|
|
* @psalm-pure |
|
795
|
|
|
* |
|
796
|
|
|
* @return string |
|
797
|
|
|
* <p>The code point encoded as U+xxxx.</p> |
|
798
|
|
|
*/ |
|
799
|
2 |
|
public static function chr_to_hex($char, string $prefix = 'U+'): string |
|
800
|
|
|
{ |
|
801
|
2 |
|
if ($char === '') { |
|
802
|
2 |
|
return ''; |
|
803
|
|
|
} |
|
804
|
|
|
|
|
805
|
2 |
|
if ($char === '�') { |
|
806
|
|
|
$char = ''; |
|
807
|
|
|
} |
|
808
|
|
|
|
|
809
|
2 |
|
return self::int_to_hex(self::ord((string) $char), $prefix); |
|
810
|
|
|
} |
|
811
|
|
|
|
|
812
|
|
|
/** |
|
813
|
|
|
* Splits a string into smaller chunks and multiple lines, using the specified line ending character. |
|
814
|
|
|
* |
|
815
|
|
|
* EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code> |
|
816
|
|
|
* |
|
817
|
|
|
* @param string $body <p>The original string to be split.</p> |
|
818
|
|
|
* @param int $chunk_length [optional] <p>The maximum character length of a chunk.</p> |
|
819
|
|
|
* @param string $end [optional] <p>The character(s) to be inserted at the end of each chunk.</p> |
|
820
|
|
|
* |
|
821
|
|
|
* @psalm-pure |
|
822
|
|
|
* |
|
823
|
|
|
* @return string |
|
824
|
|
|
* <p>The chunked string.</p> |
|
825
|
|
|
*/ |
|
826
|
4 |
|
public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string |
|
827
|
|
|
{ |
|
828
|
4 |
|
return \implode($end, self::str_split($body, $chunk_length)); |
|
829
|
|
|
} |
|
830
|
|
|
|
|
831
|
|
|
/** |
|
832
|
|
|
* Accepts a string and removes all non-UTF-8 characters from it + extras if needed. |
|
833
|
|
|
* |
|
834
|
|
|
* EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef …” — 😃 - Düsseldorf'</code> |
|
835
|
|
|
* |
|
836
|
|
|
* @param string $str <p>The string to be sanitized.</p> |
|
837
|
|
|
* @param bool $remove_bom [optional] <p>Set to true, if you need to remove |
|
838
|
|
|
* UTF-BOM.</p> |
|
839
|
|
|
* @param bool $normalize_whitespace [optional] <p>Set to true, if you need to normalize the |
|
840
|
|
|
* whitespace.</p> |
|
841
|
|
|
* @param bool $normalize_msword [optional] <p>Set to true, if you need to normalize MS |
|
842
|
|
|
* Word chars e.g.: "…" |
|
843
|
|
|
* => "..."</p> |
|
844
|
|
|
* @param bool $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, |
|
845
|
|
|
* in |
|
846
|
|
|
* combination with |
|
847
|
|
|
* $normalize_whitespace</p> |
|
848
|
|
|
* @param bool $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond |
|
849
|
|
|
* question mark e.g.: "�"</p> |
|
850
|
|
|
* @param bool $remove_invisible_characters [optional] <p>Set to false, if you not want to remove |
|
851
|
|
|
* invisible characters e.g.: "\0"</p> |
|
852
|
|
|
* @param bool $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove |
|
853
|
|
|
* invisible url encoded characters e.g.: "%0B"<br> WARNING: |
|
854
|
|
|
* maybe contains false-positives e.g. aa%0Baa -> aaaa. |
|
855
|
|
|
* </p> |
|
856
|
|
|
* |
|
857
|
|
|
* @psalm-pure |
|
858
|
|
|
* |
|
859
|
|
|
* @return string |
|
860
|
|
|
* <p>An clean UTF-8 encoded string.</p> |
|
861
|
|
|
*/ |
|
862
|
90 |
|
public static function clean( |
|
863
|
|
|
string $str, |
|
864
|
|
|
bool $remove_bom = false, |
|
865
|
|
|
bool $normalize_whitespace = false, |
|
866
|
|
|
bool $normalize_msword = false, |
|
867
|
|
|
bool $keep_non_breaking_space = false, |
|
868
|
|
|
bool $replace_diamond_question_mark = false, |
|
869
|
|
|
bool $remove_invisible_characters = true, |
|
870
|
|
|
bool $remove_invisible_characters_url_encoded = false |
|
871
|
|
|
): string { |
|
872
|
|
|
// http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string |
|
873
|
|
|
// caused connection reset problem on larger strings |
|
874
|
|
|
|
|
875
|
90 |
|
$regex = '/ |
|
876
|
|
|
( |
|
877
|
|
|
(?: [\x00-\x7F] # single-byte sequences 0xxxxxxx |
|
878
|
|
|
| [\xC0-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx |
|
879
|
|
|
| [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences 1110xxxx 10xxxxxx * 2 |
|
880
|
|
|
| [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3 |
|
881
|
|
|
){1,100} # ...one or more times |
|
882
|
|
|
) |
|
883
|
|
|
| ( [\x80-\xBF] ) # invalid byte in range 10000000 - 10111111 |
|
884
|
|
|
| ( [\xC0-\xFF] ) # invalid byte in range 11000000 - 11111111 |
|
885
|
|
|
/x'; |
|
886
|
90 |
|
$str = (string) \preg_replace($regex, '$1', $str); |
|
887
|
|
|
|
|
888
|
90 |
|
if ($replace_diamond_question_mark) { |
|
889
|
33 |
|
$str = self::replace_diamond_question_mark($str); |
|
890
|
|
|
} |
|
891
|
|
|
|
|
892
|
90 |
|
if ($remove_invisible_characters) { |
|
893
|
90 |
|
$str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded); |
|
894
|
|
|
} |
|
895
|
|
|
|
|
896
|
90 |
|
if ($normalize_whitespace) { |
|
897
|
37 |
|
$str = self::normalize_whitespace($str, $keep_non_breaking_space); |
|
898
|
|
|
} |
|
899
|
|
|
|
|
900
|
90 |
|
if ($normalize_msword) { |
|
901
|
4 |
|
$str = self::normalize_msword($str); |
|
902
|
|
|
} |
|
903
|
|
|
|
|
904
|
90 |
|
if ($remove_bom) { |
|
905
|
37 |
|
$str = self::remove_bom($str); |
|
906
|
|
|
} |
|
907
|
|
|
|
|
908
|
90 |
|
return $str; |
|
909
|
|
|
} |
|
910
|
|
|
|
|
911
|
|
|
/** |
|
912
|
|
|
* Clean-up a string and show only printable UTF-8 chars at the end + fix UTF-8 encoding. |
|
913
|
|
|
* |
|
914
|
|
|
* EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef …” — 😃 - Düsseldorf'</code> |
|
915
|
|
|
* |
|
916
|
|
|
* @param string $str <p>The input string.</p> |
|
917
|
|
|
* |
|
918
|
|
|
* @psalm-pure |
|
919
|
|
|
* |
|
920
|
|
|
* @return string |
|
921
|
|
|
*/ |
|
922
|
33 |
|
public static function cleanup($str): string |
|
923
|
|
|
{ |
|
924
|
|
|
// init |
|
925
|
33 |
|
$str = (string) $str; |
|
926
|
|
|
|
|
927
|
33 |
|
if ($str === '') { |
|
928
|
5 |
|
return ''; |
|
929
|
|
|
} |
|
930
|
|
|
|
|
931
|
|
|
// fixed ISO <-> UTF-8 Errors |
|
932
|
33 |
|
$str = self::fix_simple_utf8($str); |
|
933
|
|
|
|
|
934
|
|
|
// remove all none UTF-8 symbols |
|
935
|
|
|
// && remove diamond question mark (�) |
|
936
|
|
|
// && remove remove invisible characters (e.g. "\0") |
|
937
|
|
|
// && remove BOM |
|
938
|
|
|
// && normalize whitespace chars (but keep non-breaking-spaces) |
|
939
|
33 |
|
return self::clean( |
|
940
|
33 |
|
$str, |
|
941
|
33 |
|
true, |
|
942
|
33 |
|
true, |
|
943
|
33 |
|
false, |
|
944
|
33 |
|
true, |
|
945
|
33 |
|
true |
|
946
|
|
|
); |
|
947
|
|
|
} |
|
948
|
|
|
|
|
949
|
|
|
/** |
|
950
|
|
|
* Accepts a string or a array of strings and returns an array of Unicode code points. |
|
951
|
|
|
* |
|
952
|
|
|
* INFO: opposite to UTF8::string() |
|
953
|
|
|
* |
|
954
|
|
|
* EXAMPLE: <code> |
|
955
|
|
|
* UTF8::codepoints('κöñ'); // array(954, 246, 241) |
|
956
|
|
|
* // ... OR ... |
|
957
|
|
|
* UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1') |
|
958
|
|
|
* </code> |
|
959
|
|
|
* |
|
960
|
|
|
* @param string|string[] $arg <p>A UTF-8 encoded string or an array of such strings.</p> |
|
961
|
|
|
* @param bool $use_u_style <p>If True, will return code points in U+xxxx format, |
|
962
|
|
|
* default, code points will be returned as integers.</p> |
|
963
|
|
|
* |
|
964
|
|
|
* @psalm-pure |
|
965
|
|
|
* |
|
966
|
|
|
* @return int[]|string[] |
|
967
|
|
|
* <p> |
|
968
|
|
|
* The array of code points:<br> |
|
969
|
|
|
* int[] for $u_style === false<br> |
|
970
|
|
|
* string[] for $u_style === true<br> |
|
971
|
|
|
* </p> |
|
972
|
|
|
*/ |
|
973
|
12 |
|
public static function codepoints($arg, bool $use_u_style = false): array |
|
974
|
|
|
{ |
|
975
|
12 |
|
if (\is_string($arg)) { |
|
976
|
12 |
|
$arg = self::str_split($arg); |
|
977
|
|
|
} |
|
978
|
|
|
|
|
979
|
|
|
/** |
|
980
|
|
|
* @psalm-suppress DocblockTypeContradiction |
|
981
|
|
|
*/ |
|
982
|
12 |
|
if (!\is_array($arg)) { |
|
|
|
|
|
|
983
|
4 |
|
return []; |
|
984
|
|
|
} |
|
985
|
|
|
|
|
986
|
12 |
|
if ($arg === []) { |
|
987
|
7 |
|
return []; |
|
988
|
|
|
} |
|
989
|
|
|
|
|
990
|
11 |
|
$arg = \array_map( |
|
991
|
|
|
[ |
|
992
|
11 |
|
self::class, |
|
993
|
|
|
'ord', |
|
994
|
|
|
], |
|
995
|
11 |
|
$arg |
|
996
|
|
|
); |
|
997
|
|
|
|
|
998
|
11 |
|
if ($use_u_style) { |
|
999
|
2 |
|
$arg = \array_map( |
|
1000
|
|
|
[ |
|
1001
|
2 |
|
self::class, |
|
1002
|
|
|
'int_to_hex', |
|
1003
|
|
|
], |
|
1004
|
2 |
|
$arg |
|
1005
|
|
|
); |
|
1006
|
|
|
} |
|
1007
|
|
|
|
|
1008
|
11 |
|
return $arg; |
|
1009
|
|
|
} |
|
1010
|
|
|
|
|
1011
|
|
|
/** |
|
1012
|
|
|
* Trims the string and replaces consecutive whitespace characters with a |
|
1013
|
|
|
* single space. This includes tabs and newline characters, as well as |
|
1014
|
|
|
* multibyte whitespace such as the thin space and ideographic space. |
|
1015
|
|
|
* |
|
1016
|
|
|
* @param string $str <p>The input string.</p> |
|
1017
|
|
|
* |
|
1018
|
|
|
* @psalm-pure |
|
1019
|
|
|
* |
|
1020
|
|
|
* @return string |
|
1021
|
|
|
* <p>A string with trimmed $str and condensed whitespace.</p> |
|
1022
|
|
|
*/ |
|
1023
|
13 |
|
public static function collapse_whitespace(string $str): string |
|
1024
|
|
|
{ |
|
1025
|
13 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
1026
|
13 |
|
return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str)); |
|
1027
|
|
|
} |
|
1028
|
|
|
|
|
1029
|
|
|
return \trim(self::regex_replace($str, '[[:space:]]+', ' ')); |
|
1030
|
|
|
} |
|
1031
|
|
|
|
|
1032
|
|
|
/** |
|
1033
|
|
|
* Returns count of characters used in a string. |
|
1034
|
|
|
* |
|
1035
|
|
|
* EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code> |
|
1036
|
|
|
* |
|
1037
|
|
|
* @param string $str <p>The input string.</p> |
|
1038
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
1039
|
|
|
* @param bool $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use |
|
1040
|
|
|
* |
|
1041
|
|
|
* @psalm-pure |
|
1042
|
|
|
* |
|
1043
|
|
|
* @return int[] |
|
1044
|
|
|
* <p>An associative array of Character as keys and |
|
1045
|
|
|
* their count as values.</p> |
|
1046
|
|
|
*/ |
|
1047
|
19 |
|
public static function count_chars( |
|
1048
|
|
|
string $str, |
|
1049
|
|
|
bool $clean_utf8 = false, |
|
1050
|
|
|
bool $try_to_use_mb_functions = true |
|
1051
|
|
|
): array { |
|
1052
|
19 |
|
return \array_count_values( |
|
1053
|
19 |
|
self::str_split( |
|
1054
|
19 |
|
$str, |
|
1055
|
19 |
|
1, |
|
1056
|
19 |
|
$clean_utf8, |
|
1057
|
19 |
|
$try_to_use_mb_functions |
|
1058
|
|
|
) |
|
1059
|
|
|
); |
|
1060
|
|
|
} |
|
1061
|
|
|
|
|
1062
|
|
|
/** |
|
1063
|
|
|
* Create a valid CSS identifier for e.g. "class"- or "id"-attributes. |
|
1064
|
|
|
* |
|
1065
|
|
|
* EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code> |
|
1066
|
|
|
* |
|
1067
|
|
|
* copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95 |
|
1068
|
|
|
* |
|
1069
|
|
|
* @param string $str <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p> |
|
1070
|
|
|
* @param string[] $filter |
|
1071
|
|
|
* @param bool $strip_tags |
|
1072
|
|
|
* @param bool $strtolower |
|
1073
|
|
|
* |
|
1074
|
|
|
* @psalm-pure |
|
1075
|
|
|
* |
|
1076
|
|
|
* @return string |
|
1077
|
|
|
* |
|
1078
|
|
|
* @phpstan-param array<string,string> $filter |
|
1079
|
|
|
*/ |
|
1080
|
1 |
|
public static function css_identifier( |
|
1081
|
|
|
string $str = '', |
|
1082
|
|
|
array $filter = [ |
|
1083
|
|
|
' ' => '-', |
|
1084
|
|
|
'/' => '-', |
|
1085
|
|
|
'[' => '', |
|
1086
|
|
|
']' => '', |
|
1087
|
|
|
], |
|
1088
|
|
|
bool $strip_tags = false, |
|
1089
|
|
|
bool $strtolower = true |
|
1090
|
|
|
): string { |
|
1091
|
|
|
// We could also use strtr() here but its much slower than str_replace(). In |
|
1092
|
|
|
// order to keep '__' to stay '__' we first replace it with a different |
|
1093
|
|
|
// placeholder after checking that it is not defined as a filter. |
|
1094
|
1 |
|
$double_underscore_replacements = 0; |
|
1095
|
|
|
|
|
1096
|
|
|
// Fallback ... |
|
1097
|
1 |
|
if (\trim($str) === '') { |
|
1098
|
1 |
|
$str = \uniqid('auto-generated-css-class', true); |
|
1099
|
|
|
} else { |
|
1100
|
1 |
|
$str = self::clean($str); |
|
1101
|
|
|
} |
|
1102
|
|
|
|
|
1103
|
1 |
|
if ($strip_tags) { |
|
1104
|
|
|
$str = \strip_tags($str); |
|
1105
|
|
|
} |
|
1106
|
|
|
|
|
1107
|
1 |
|
if ($strtolower) { |
|
1108
|
1 |
|
$str = \strtolower($str); |
|
1109
|
|
|
} |
|
1110
|
|
|
|
|
1111
|
1 |
|
if (!isset($filter['__'])) { |
|
1112
|
1 |
|
$str = \str_replace('__', '##', $str, $double_underscore_replacements); |
|
1113
|
|
|
} |
|
1114
|
|
|
|
|
1115
|
1 |
|
$str = \str_replace(\array_keys($filter), \array_values($filter), $str); |
|
1116
|
|
|
// Replace temporary placeholder '##' with '__' only if the original |
|
1117
|
|
|
// $identifier contained '__'. |
|
1118
|
1 |
|
if ($double_underscore_replacements > 0) { |
|
1119
|
|
|
$str = \str_replace('##', '__', $str); |
|
1120
|
|
|
} |
|
1121
|
|
|
|
|
1122
|
|
|
// Valid characters in a CSS identifier are: |
|
1123
|
|
|
// - the hyphen (U+002D) |
|
1124
|
|
|
// - a-z (U+0030 - U+0039) |
|
1125
|
|
|
// - A-Z (U+0041 - U+005A) |
|
1126
|
|
|
// - the underscore (U+005F) |
|
1127
|
|
|
// - 0-9 (U+0061 - U+007A) |
|
1128
|
|
|
// - ISO 10646 characters U+00A1 and higher |
|
1129
|
|
|
// We strip out any character not in the above list. |
|
1130
|
1 |
|
$str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str); |
|
1131
|
|
|
// Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit. |
|
1132
|
1 |
|
$str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str); |
|
1133
|
|
|
|
|
1134
|
1 |
|
return \trim($str, '-'); |
|
1135
|
|
|
} |
|
1136
|
|
|
|
|
1137
|
|
|
/** |
|
1138
|
|
|
* Remove css media-queries. |
|
1139
|
|
|
* |
|
1140
|
|
|
* @param string $str |
|
1141
|
|
|
* |
|
1142
|
|
|
* @psalm-pure |
|
1143
|
|
|
* |
|
1144
|
|
|
* @return string |
|
1145
|
|
|
*/ |
|
1146
|
1 |
|
public static function css_stripe_media_queries(string $str): string |
|
1147
|
|
|
{ |
|
1148
|
1 |
|
return (string) \preg_replace( |
|
1149
|
1 |
|
'#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU', |
|
1150
|
1 |
|
'', |
|
1151
|
1 |
|
$str |
|
1152
|
|
|
); |
|
1153
|
|
|
} |
|
1154
|
|
|
|
|
1155
|
|
|
/** |
|
1156
|
|
|
* Checks whether ctype is available on the server. |
|
1157
|
|
|
* |
|
1158
|
|
|
* @psalm-pure |
|
1159
|
|
|
* |
|
1160
|
|
|
* @return bool |
|
1161
|
|
|
* <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> |
|
1162
|
|
|
* |
|
1163
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
|
1164
|
|
|
*/ |
|
1165
|
|
|
public static function ctype_loaded(): bool |
|
1166
|
|
|
{ |
|
1167
|
|
|
return \extension_loaded('ctype'); |
|
1168
|
|
|
} |
|
1169
|
|
|
|
|
1170
|
|
|
/** |
|
1171
|
|
|
* Converts an int value into a UTF-8 character. |
|
1172
|
|
|
* |
|
1173
|
|
|
* INFO: opposite to UTF8::string() |
|
1174
|
|
|
* |
|
1175
|
|
|
* EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code> |
|
1176
|
|
|
* |
|
1177
|
|
|
* @param int|string $int |
|
1178
|
|
|
* |
|
1179
|
|
|
* @phpstan-param int|numeric-string $int |
|
1180
|
|
|
* |
|
1181
|
|
|
* @psalm-pure |
|
1182
|
|
|
* |
|
1183
|
|
|
* @return string |
|
1184
|
|
|
*/ |
|
1185
|
20 |
|
public static function decimal_to_chr($int): string |
|
1186
|
|
|
{ |
|
1187
|
|
|
// We cannot use html_entity_decode() here, as it will not return |
|
1188
|
|
|
// characters for many values < 160. |
|
1189
|
20 |
|
return mb_convert_encoding('&#' . $int . ';', 'UTF-8', 'HTML-ENTITIES'); |
|
|
|
|
|
|
1190
|
|
|
} |
|
1191
|
|
|
|
|
1192
|
|
|
/** |
|
1193
|
|
|
* Decodes a MIME header field |
|
1194
|
|
|
* |
|
1195
|
|
|
* @param string $str |
|
1196
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
1197
|
|
|
* |
|
1198
|
|
|
* @psalm-pure |
|
1199
|
|
|
* |
|
1200
|
|
|
* @return false|string |
|
1201
|
|
|
* <p>A decoded MIME field on success, |
|
1202
|
|
|
* or false if an error occurs during the decoding.</p> |
|
1203
|
|
|
*/ |
|
1204
|
2 |
|
public static function decode_mimeheader($str, string $encoding = 'UTF-8') |
|
1205
|
|
|
{ |
|
1206
|
2 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
1207
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
1208
|
|
|
} |
|
1209
|
|
|
|
|
1210
|
|
|
// always fallback via symfony polyfill |
|
1211
|
2 |
|
return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding); |
|
1212
|
|
|
} |
|
1213
|
|
|
|
|
1214
|
|
|
/** |
|
1215
|
|
|
* Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji. |
|
1216
|
|
|
* |
|
1217
|
|
|
* @see https://en.wikipedia.org/wiki/ISO_3166-1 |
|
1218
|
|
|
* |
|
1219
|
|
|
* @param string $country_code_iso_3166_1 <p>e.g. DE</p> |
|
1220
|
|
|
* |
|
1221
|
|
|
* @return string |
|
1222
|
|
|
* <p>Emoji or empty string on error.</p> |
|
1223
|
|
|
*/ |
|
1224
|
1 |
|
public static function emoji_from_country_code(string $country_code_iso_3166_1): string |
|
1225
|
|
|
{ |
|
1226
|
1 |
|
if ($country_code_iso_3166_1 === '') { |
|
1227
|
1 |
|
return ''; |
|
1228
|
|
|
} |
|
1229
|
|
|
|
|
1230
|
1 |
|
if (self::strlen($country_code_iso_3166_1) !== 2) { |
|
1231
|
1 |
|
return ''; |
|
1232
|
|
|
} |
|
1233
|
|
|
|
|
1234
|
1 |
|
$country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1); |
|
1235
|
|
|
|
|
1236
|
1 |
|
$flagOffset = 0x1F1E6; |
|
1237
|
1 |
|
$asciiOffset = 0x41; |
|
1238
|
|
|
|
|
1239
|
1 |
|
return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') . |
|
1240
|
1 |
|
(self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? ''); |
|
1241
|
|
|
} |
|
1242
|
|
|
|
|
1243
|
|
|
/** |
|
1244
|
|
|
* Decodes a string which was encoded by "UTF8::emoji_encode()". |
|
1245
|
|
|
* |
|
1246
|
|
|
* INFO: opposite to UTF8::emoji_encode() |
|
1247
|
|
|
* |
|
1248
|
|
|
* EXAMPLE: <code> |
|
1249
|
|
|
* UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹' |
|
1250
|
|
|
* // |
|
1251
|
|
|
* UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹' |
|
1252
|
|
|
* </code> |
|
1253
|
|
|
* |
|
1254
|
|
|
* @param string $str <p>The input string.</p> |
|
1255
|
|
|
* @param bool $use_reversible_string_mappings [optional] <p> |
|
1256
|
|
|
* When <b>TRUE</b>, we se a reversible string mapping |
|
1257
|
|
|
* between "emoji_encode" and "emoji_decode".</p> |
|
1258
|
|
|
* |
|
1259
|
|
|
* @psalm-pure |
|
1260
|
|
|
* |
|
1261
|
|
|
* @return string |
|
1262
|
|
|
*/ |
|
1263
|
9 |
|
public static function emoji_decode( |
|
1264
|
|
|
string $str, |
|
1265
|
|
|
bool $use_reversible_string_mappings = false |
|
1266
|
|
|
): string { |
|
1267
|
9 |
|
if (self::$EMOJI_KEYS_CACHE === null) { |
|
1268
|
|
|
/** @phpstan-ignore-next-line - we need to load the data first */ |
|
1269
|
|
|
self::initEmojiData(); |
|
1270
|
|
|
} |
|
1271
|
|
|
|
|
1272
|
9 |
|
if ($use_reversible_string_mappings) { |
|
1273
|
9 |
|
return (string) \str_replace( |
|
1274
|
9 |
|
(array) self::$EMOJI_KEYS_REVERSIBLE_CACHE, |
|
1275
|
9 |
|
(array) self::$EMOJI_VALUES_CACHE, |
|
1276
|
9 |
|
$str |
|
1277
|
|
|
); |
|
1278
|
|
|
} |
|
1279
|
|
|
|
|
1280
|
1 |
|
return (string) \str_replace( |
|
1281
|
1 |
|
(array) self::$EMOJI_KEYS_CACHE, |
|
1282
|
1 |
|
(array) self::$EMOJI_VALUES_CACHE, |
|
1283
|
1 |
|
$str |
|
1284
|
|
|
); |
|
1285
|
|
|
} |
|
1286
|
|
|
|
|
1287
|
|
|
/** |
|
1288
|
|
|
* Encode a string with emoji chars into a non-emoji string. |
|
1289
|
|
|
* |
|
1290
|
|
|
* INFO: opposite to UTF8::emoji_decode() |
|
1291
|
|
|
* |
|
1292
|
|
|
* EXAMPLE: <code> |
|
1293
|
|
|
* UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE' |
|
1294
|
|
|
* // |
|
1295
|
|
|
* UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_' |
|
1296
|
|
|
* </code> |
|
1297
|
|
|
* |
|
1298
|
|
|
* @param string $str <p>The input string</p> |
|
1299
|
|
|
* @param bool $use_reversible_string_mappings [optional] <p> |
|
1300
|
|
|
* when <b>TRUE</b>, we use a reversible string mapping |
|
1301
|
|
|
* between "emoji_encode" and "emoji_decode"</p> |
|
1302
|
|
|
* |
|
1303
|
|
|
* @psalm-pure |
|
1304
|
|
|
* |
|
1305
|
|
|
* @return string |
|
1306
|
|
|
*/ |
|
1307
|
12 |
|
public static function emoji_encode( |
|
1308
|
|
|
string $str, |
|
1309
|
|
|
bool $use_reversible_string_mappings = false |
|
1310
|
|
|
): string { |
|
1311
|
12 |
|
if (self::$EMOJI_KEYS_CACHE === null) { |
|
1312
|
|
|
/** @phpstan-ignore-next-line - we need to load the data first */ |
|
1313
|
1 |
|
self::initEmojiData(); |
|
1314
|
|
|
} |
|
1315
|
|
|
|
|
1316
|
12 |
|
if ($use_reversible_string_mappings) { |
|
1317
|
9 |
|
return (string) \str_replace( |
|
1318
|
9 |
|
(array) self::$EMOJI_VALUES_CACHE, |
|
1319
|
9 |
|
(array) self::$EMOJI_KEYS_REVERSIBLE_CACHE, |
|
1320
|
9 |
|
$str |
|
1321
|
|
|
); |
|
1322
|
|
|
} |
|
1323
|
|
|
|
|
1324
|
4 |
|
return (string) \str_replace( |
|
1325
|
4 |
|
(array) self::$EMOJI_VALUES_CACHE, |
|
1326
|
4 |
|
(array) self::$EMOJI_KEYS_CACHE, |
|
1327
|
4 |
|
$str |
|
1328
|
|
|
); |
|
1329
|
|
|
} |
|
1330
|
|
|
|
|
1331
|
|
|
/** |
|
1332
|
|
|
* Encode a string with a new charset-encoding. |
|
1333
|
|
|
* |
|
1334
|
|
|
* INFO: This function will also try to fix broken / double encoding, |
|
1335
|
|
|
* so you can call this function also on a UTF-8 string and you don't mess up the string. |
|
1336
|
|
|
* |
|
1337
|
|
|
* EXAMPLE: <code> |
|
1338
|
|
|
* UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-' |
|
1339
|
|
|
* // |
|
1340
|
|
|
* UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-' |
|
1341
|
|
|
* // |
|
1342
|
|
|
* UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-中文空白-' |
|
1343
|
|
|
* // |
|
1344
|
|
|
* UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t' |
|
1345
|
|
|
* </code> |
|
1346
|
|
|
* |
|
1347
|
|
|
* @param string $to_encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p> |
|
1348
|
|
|
* @param string $str <p>The input string</p> |
|
1349
|
|
|
* @param bool $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double |
|
1350
|
|
|
* encoding for UTF-8)<br> otherwise we auto-detect the current |
|
1351
|
|
|
* string-encoding</p> |
|
1352
|
|
|
* @param string $from_encoding [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br> |
|
1353
|
|
|
* A empty string will trigger the autodetect anyway.</p> |
|
1354
|
|
|
* |
|
1355
|
|
|
* @psalm-pure |
|
1356
|
|
|
* |
|
1357
|
|
|
* @return string |
|
1358
|
|
|
* |
|
1359
|
|
|
* @psalm-suppress InvalidReturnStatement |
|
1360
|
|
|
*/ |
|
1361
|
28 |
|
public static function encode( |
|
1362
|
|
|
string $to_encoding, |
|
1363
|
|
|
string $str, |
|
1364
|
|
|
bool $auto_detect_the_from_encoding = true, |
|
1365
|
|
|
string $from_encoding = '' |
|
1366
|
|
|
): string { |
|
1367
|
28 |
|
if ($str === '' || $to_encoding === '') { |
|
1368
|
13 |
|
return $str; |
|
1369
|
|
|
} |
|
1370
|
|
|
|
|
1371
|
28 |
|
if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') { |
|
1372
|
7 |
|
$to_encoding = self::normalize_encoding($to_encoding, 'UTF-8'); |
|
1373
|
|
|
} |
|
1374
|
|
|
|
|
1375
|
28 |
|
if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') { |
|
1376
|
2 |
|
$from_encoding = self::normalize_encoding($from_encoding); |
|
1377
|
|
|
} |
|
1378
|
|
|
|
|
1379
|
|
|
if ( |
|
1380
|
28 |
|
$to_encoding |
|
1381
|
|
|
&& |
|
1382
|
28 |
|
$from_encoding |
|
1383
|
|
|
&& |
|
1384
|
28 |
|
$from_encoding === $to_encoding |
|
1385
|
|
|
) { |
|
1386
|
|
|
return $str; |
|
1387
|
|
|
} |
|
1388
|
|
|
|
|
1389
|
28 |
|
if ($to_encoding === 'JSON') { |
|
1390
|
1 |
|
$return = self::json_encode($str); |
|
1391
|
1 |
|
if ($return === false) { |
|
1392
|
|
|
throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().'); |
|
1393
|
|
|
} |
|
1394
|
|
|
|
|
1395
|
1 |
|
return $return; |
|
1396
|
|
|
} |
|
1397
|
28 |
|
if ($from_encoding === 'JSON') { |
|
1398
|
1 |
|
$str = self::json_decode($str); |
|
1399
|
1 |
|
$from_encoding = ''; |
|
1400
|
|
|
} |
|
1401
|
|
|
|
|
1402
|
28 |
|
if ($to_encoding === 'BASE64') { |
|
1403
|
2 |
|
return \base64_encode($str); |
|
1404
|
|
|
} |
|
1405
|
28 |
|
if ($from_encoding === 'BASE64') { |
|
1406
|
2 |
|
$str = \base64_decode($str, true); |
|
1407
|
2 |
|
$from_encoding = ''; |
|
1408
|
|
|
} |
|
1409
|
|
|
|
|
1410
|
28 |
|
if ($to_encoding === 'HTML-ENTITIES') { |
|
1411
|
2 |
|
return self::html_encode($str, true); |
|
1412
|
|
|
} |
|
1413
|
28 |
|
if ($from_encoding === 'HTML-ENTITIES') { |
|
1414
|
2 |
|
$str = self::html_entity_decode($str, \ENT_COMPAT); |
|
1415
|
2 |
|
$from_encoding = ''; |
|
1416
|
|
|
} |
|
1417
|
|
|
|
|
1418
|
28 |
|
$from_encoding_auto_detected = false; |
|
1419
|
|
|
if ( |
|
1420
|
28 |
|
$auto_detect_the_from_encoding |
|
1421
|
|
|
|| |
|
1422
|
28 |
|
!$from_encoding |
|
1423
|
|
|
) { |
|
1424
|
28 |
|
$from_encoding_auto_detected = self::str_detect_encoding($str); |
|
1425
|
|
|
} |
|
1426
|
|
|
|
|
1427
|
|
|
// DEBUG |
|
1428
|
|
|
//var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n"); |
|
1429
|
|
|
|
|
1430
|
28 |
|
if ($from_encoding_auto_detected !== false) { |
|
1431
|
25 |
|
$from_encoding = $from_encoding_auto_detected; |
|
1432
|
6 |
|
} elseif ($auto_detect_the_from_encoding) { |
|
1433
|
|
|
// fallback for the "autodetect"-mode |
|
1434
|
6 |
|
return self::to_utf8($str); |
|
1435
|
|
|
} |
|
1436
|
|
|
|
|
1437
|
|
|
if ( |
|
1438
|
25 |
|
!$from_encoding |
|
1439
|
|
|
|| |
|
1440
|
25 |
|
$from_encoding === $to_encoding |
|
1441
|
|
|
) { |
|
1442
|
15 |
|
return $str; |
|
1443
|
|
|
} |
|
1444
|
|
|
|
|
1445
|
|
|
if ( |
|
1446
|
20 |
|
$to_encoding === 'UTF-8' |
|
1447
|
|
|
&& |
|
1448
|
|
|
( |
|
1449
|
18 |
|
$from_encoding === 'WINDOWS-1252' |
|
1450
|
|
|
|| |
|
1451
|
20 |
|
$from_encoding === 'ISO-8859-1' |
|
1452
|
|
|
) |
|
1453
|
|
|
) { |
|
1454
|
14 |
|
return self::to_utf8($str); |
|
1455
|
|
|
} |
|
1456
|
|
|
|
|
1457
|
|
|
if ( |
|
1458
|
12 |
|
$to_encoding === 'ISO-8859-1' |
|
1459
|
|
|
&& |
|
1460
|
|
|
( |
|
1461
|
6 |
|
$from_encoding === 'WINDOWS-1252' |
|
1462
|
|
|
|| |
|
1463
|
12 |
|
$from_encoding === 'UTF-8' |
|
1464
|
|
|
) |
|
1465
|
|
|
) { |
|
1466
|
6 |
|
return self::to_iso8859($str); |
|
1467
|
|
|
} |
|
1468
|
|
|
|
|
1469
|
|
|
if ( |
|
1470
|
10 |
|
$to_encoding !== 'UTF-8' |
|
1471
|
|
|
&& |
|
1472
|
10 |
|
$to_encoding !== 'ISO-8859-1' |
|
1473
|
|
|
&& |
|
1474
|
10 |
|
$to_encoding !== 'WINDOWS-1252' |
|
1475
|
|
|
&& |
|
1476
|
10 |
|
self::$SUPPORT['mbstring'] === false |
|
1477
|
|
|
) { |
|
1478
|
|
|
/** |
|
1479
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
1480
|
|
|
*/ |
|
1481
|
|
|
\trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING); |
|
1482
|
|
|
} |
|
1483
|
|
|
|
|
1484
|
10 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
1485
|
10 |
|
$str_encoded = \mb_convert_encoding( |
|
1486
|
10 |
|
$str, |
|
1487
|
10 |
|
$to_encoding, |
|
1488
|
10 |
|
$from_encoding |
|
1489
|
|
|
); |
|
1490
|
|
|
|
|
1491
|
10 |
|
if ($str_encoded) { |
|
1492
|
|
|
\assert(\is_string($str_encoded)); |
|
1493
|
|
|
|
|
1494
|
10 |
|
return $str_encoded; |
|
1495
|
|
|
} |
|
1496
|
|
|
} |
|
1497
|
|
|
|
|
1498
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */ |
|
1499
|
|
|
$return = @\iconv($from_encoding, $to_encoding, $str); |
|
1500
|
|
|
if ($return !== false) { |
|
1501
|
|
|
return $return; |
|
1502
|
|
|
} |
|
1503
|
|
|
|
|
1504
|
|
|
return $str; |
|
1505
|
|
|
} |
|
1506
|
|
|
|
|
1507
|
|
|
/** |
|
1508
|
|
|
* @param string $str |
|
1509
|
|
|
* @param string $from_charset [optional] <p>Set the input charset.</p> |
|
1510
|
|
|
* @param string $to_charset [optional] <p>Set the output charset.</p> |
|
1511
|
|
|
* @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p> |
|
1512
|
|
|
* @param string $linefeed [optional] <p>Set the used linefeed.</p> |
|
1513
|
|
|
* @param int $indent [optional] <p>Set the max length indent.</p> |
|
1514
|
|
|
* |
|
1515
|
|
|
* @psalm-pure |
|
1516
|
|
|
* |
|
1517
|
|
|
* @return false|string |
|
1518
|
|
|
* <p>An encoded MIME field on success, |
|
1519
|
|
|
* or false if an error occurs during the encoding.</p> |
|
1520
|
|
|
*/ |
|
1521
|
1 |
|
public static function encode_mimeheader( |
|
1522
|
|
|
string $str, |
|
1523
|
|
|
string $from_charset = 'UTF-8', |
|
1524
|
|
|
string $to_charset = 'UTF-8', |
|
1525
|
|
|
string $transfer_encoding = 'Q', |
|
1526
|
|
|
string $linefeed = "\r\n", |
|
1527
|
|
|
int $indent = 76 |
|
1528
|
|
|
) { |
|
1529
|
1 |
|
if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') { |
|
1530
|
|
|
$from_charset = self::normalize_encoding($from_charset, 'UTF-8'); |
|
1531
|
|
|
} |
|
1532
|
|
|
|
|
1533
|
1 |
|
if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') { |
|
1534
|
1 |
|
$to_charset = self::normalize_encoding($to_charset, 'UTF-8'); |
|
1535
|
|
|
} |
|
1536
|
|
|
|
|
1537
|
|
|
// always fallback via symfony polyfill |
|
1538
|
1 |
|
return \iconv_mime_encode( |
|
1539
|
1 |
|
'', |
|
1540
|
1 |
|
$str, |
|
1541
|
|
|
[ |
|
1542
|
1 |
|
'scheme' => $transfer_encoding, |
|
1543
|
1 |
|
'line-length' => $indent, |
|
1544
|
1 |
|
'input-charset' => $from_charset, |
|
1545
|
1 |
|
'output-charset' => $to_charset, |
|
1546
|
1 |
|
'line-break-chars' => $linefeed, |
|
1547
|
|
|
] |
|
1548
|
|
|
); |
|
1549
|
|
|
} |
|
1550
|
|
|
|
|
1551
|
|
|
/** |
|
1552
|
|
|
* Create an extract from a sentence, so if the search-string was found, it try to centered in the output. |
|
1553
|
|
|
* |
|
1554
|
|
|
* @param string $str <p>The input string.</p> |
|
1555
|
|
|
* @param string $search <p>The searched string.</p> |
|
1556
|
|
|
* @param int|null $length [optional] <p>Default: null === text->length / 2</p> |
|
1557
|
|
|
* @param string $replacer_for_skipped_text [optional] <p>Default: …</p> |
|
1558
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
1559
|
|
|
* |
|
1560
|
|
|
* @psalm-pure |
|
1561
|
|
|
* |
|
1562
|
|
|
* @return string |
|
1563
|
|
|
*/ |
|
1564
|
1 |
|
public static function extract_text( |
|
1565
|
|
|
string $str, |
|
1566
|
|
|
string $search = '', |
|
1567
|
|
|
int $length = null, |
|
1568
|
|
|
string $replacer_for_skipped_text = '…', |
|
1569
|
|
|
string $encoding = 'UTF-8' |
|
1570
|
|
|
): string { |
|
1571
|
1 |
|
if ($str === '') { |
|
1572
|
1 |
|
return ''; |
|
1573
|
|
|
} |
|
1574
|
|
|
|
|
1575
|
1 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
1576
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
1577
|
|
|
} |
|
1578
|
|
|
|
|
1579
|
1 |
|
$trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&"; |
|
1580
|
|
|
|
|
1581
|
1 |
|
if ($length === null) { |
|
1582
|
1 |
|
$length = (int) \round((int) self::strlen($str, $encoding) / 2); |
|
1583
|
|
|
} |
|
1584
|
|
|
|
|
1585
|
1 |
|
if ($search === '') { |
|
1586
|
1 |
|
if ($encoding === 'UTF-8') { |
|
1587
|
1 |
|
if ($length > 0) { |
|
1588
|
1 |
|
$string_length = (int) \mb_strlen($str); |
|
1589
|
1 |
|
$end = ($length - 1) > $string_length ? $string_length : ($length - 1); |
|
1590
|
|
|
} else { |
|
1591
|
1 |
|
$end = 0; |
|
1592
|
|
|
} |
|
1593
|
|
|
|
|
1594
|
1 |
|
$pos = (int) \min( |
|
1595
|
1 |
|
\mb_strpos($str, ' ', $end), |
|
1596
|
1 |
|
\mb_strpos($str, '.', $end) |
|
1597
|
|
|
); |
|
1598
|
|
|
} else { |
|
1599
|
|
|
if ($length > 0) { |
|
1600
|
|
|
$string_length = (int) self::strlen($str, $encoding); |
|
1601
|
|
|
$end = ($length - 1) > $string_length ? $string_length : ($length - 1); |
|
1602
|
|
|
} else { |
|
1603
|
|
|
$end = 0; |
|
1604
|
|
|
} |
|
1605
|
|
|
|
|
1606
|
|
|
$pos = (int) \min( |
|
1607
|
|
|
self::strpos($str, ' ', $end, $encoding), |
|
1608
|
|
|
self::strpos($str, '.', $end, $encoding) |
|
1609
|
|
|
); |
|
1610
|
|
|
} |
|
1611
|
|
|
|
|
1612
|
1 |
|
if ($pos) { |
|
1613
|
1 |
|
if ($encoding === 'UTF-8') { |
|
1614
|
1 |
|
$str_sub = \mb_substr($str, 0, $pos); |
|
1615
|
|
|
} else { |
|
1616
|
|
|
$str_sub = self::substr($str, 0, $pos, $encoding); |
|
1617
|
|
|
} |
|
1618
|
|
|
|
|
1619
|
1 |
|
if ($str_sub === false) { |
|
1620
|
|
|
return ''; |
|
1621
|
|
|
} |
|
1622
|
|
|
|
|
1623
|
1 |
|
return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text; |
|
1624
|
|
|
} |
|
1625
|
|
|
|
|
1626
|
|
|
return $str; |
|
1627
|
|
|
} |
|
1628
|
|
|
|
|
1629
|
1 |
|
if ($encoding === 'UTF-8') { |
|
1630
|
1 |
|
$word_position = (int) \mb_stripos($str, $search); |
|
1631
|
1 |
|
$half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2); |
|
1632
|
|
|
} else { |
|
1633
|
|
|
$word_position = (int) self::stripos($str, $search, 0, $encoding); |
|
1634
|
|
|
$half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2); |
|
1635
|
|
|
} |
|
1636
|
|
|
|
|
1637
|
1 |
|
$pos_start = 0; |
|
1638
|
1 |
|
if ($half_side > 0) { |
|
1639
|
1 |
|
if ($encoding === 'UTF-8') { |
|
1640
|
1 |
|
$half_text = \mb_substr($str, 0, $half_side); |
|
1641
|
|
|
} else { |
|
1642
|
|
|
$half_text = self::substr($str, 0, $half_side, $encoding); |
|
1643
|
|
|
} |
|
1644
|
1 |
|
if ($half_text !== false) { |
|
1645
|
1 |
|
if ($encoding === 'UTF-8') { |
|
1646
|
1 |
|
$pos_start = (int) \max( |
|
1647
|
1 |
|
\mb_strrpos($half_text, ' '), |
|
1648
|
1 |
|
\mb_strrpos($half_text, '.') |
|
1649
|
|
|
); |
|
1650
|
|
|
} else { |
|
1651
|
|
|
$pos_start = (int) \max( |
|
1652
|
|
|
self::strrpos($half_text, ' ', 0, $encoding), |
|
1653
|
|
|
self::strrpos($half_text, '.', 0, $encoding) |
|
1654
|
|
|
); |
|
1655
|
|
|
} |
|
1656
|
|
|
} |
|
1657
|
|
|
} |
|
1658
|
|
|
|
|
1659
|
1 |
|
if ($word_position && $half_side > 0) { |
|
1660
|
1 |
|
$offset = $pos_start + $length - 1; |
|
1661
|
1 |
|
$real_length = (int) self::strlen($str, $encoding); |
|
1662
|
|
|
|
|
1663
|
1 |
|
if ($offset > $real_length) { |
|
1664
|
|
|
$offset = $real_length; |
|
1665
|
|
|
} |
|
1666
|
|
|
|
|
1667
|
1 |
|
if ($encoding === 'UTF-8') { |
|
1668
|
1 |
|
$pos_end = (int) \min( |
|
1669
|
1 |
|
\mb_strpos($str, ' ', $offset), |
|
1670
|
1 |
|
\mb_strpos($str, '.', $offset) |
|
1671
|
1 |
|
) - $pos_start; |
|
1672
|
|
|
} else { |
|
1673
|
|
|
$pos_end = (int) \min( |
|
1674
|
|
|
self::strpos($str, ' ', $offset, $encoding), |
|
1675
|
|
|
self::strpos($str, '.', $offset, $encoding) |
|
1676
|
|
|
) - $pos_start; |
|
1677
|
|
|
} |
|
1678
|
|
|
|
|
1679
|
1 |
|
if (!$pos_end || $pos_end <= 0) { |
|
1680
|
1 |
|
if ($encoding === 'UTF-8') { |
|
1681
|
1 |
|
$str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str)); |
|
1682
|
|
|
} else { |
|
1683
|
|
|
$str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding); |
|
1684
|
|
|
} |
|
1685
|
1 |
|
if ($str_sub !== false) { |
|
1686
|
1 |
|
$extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars); |
|
1687
|
|
|
} else { |
|
1688
|
1 |
|
$extract = ''; |
|
1689
|
|
|
} |
|
1690
|
|
|
} else { |
|
1691
|
1 |
|
if ($encoding === 'UTF-8') { |
|
1692
|
1 |
|
$str_sub = \mb_substr($str, $pos_start, $pos_end); |
|
1693
|
|
|
} else { |
|
1694
|
|
|
$str_sub = self::substr($str, $pos_start, $pos_end, $encoding); |
|
1695
|
|
|
} |
|
1696
|
1 |
|
if ($str_sub !== false) { |
|
1697
|
1 |
|
$extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text; |
|
1698
|
|
|
} else { |
|
1699
|
1 |
|
$extract = ''; |
|
1700
|
|
|
} |
|
1701
|
|
|
} |
|
1702
|
|
|
} else { |
|
1703
|
1 |
|
$offset = $length - 1; |
|
1704
|
1 |
|
$true_length = (int) self::strlen($str, $encoding); |
|
1705
|
|
|
|
|
1706
|
1 |
|
if ($offset > $true_length) { |
|
1707
|
|
|
$offset = $true_length; |
|
1708
|
|
|
} |
|
1709
|
|
|
|
|
1710
|
1 |
|
if ($encoding === 'UTF-8') { |
|
1711
|
1 |
|
$pos_end = (int) \min( |
|
1712
|
1 |
|
\mb_strpos($str, ' ', $offset), |
|
1713
|
1 |
|
\mb_strpos($str, '.', $offset) |
|
1714
|
|
|
); |
|
1715
|
|
|
} else { |
|
1716
|
|
|
$pos_end = (int) \min( |
|
1717
|
|
|
self::strpos($str, ' ', $offset, $encoding), |
|
1718
|
|
|
self::strpos($str, '.', $offset, $encoding) |
|
1719
|
|
|
); |
|
1720
|
|
|
} |
|
1721
|
|
|
|
|
1722
|
1 |
|
if ($pos_end) { |
|
1723
|
1 |
|
if ($encoding === 'UTF-8') { |
|
1724
|
1 |
|
$str_sub = \mb_substr($str, 0, $pos_end); |
|
1725
|
|
|
} else { |
|
1726
|
|
|
$str_sub = self::substr($str, 0, $pos_end, $encoding); |
|
1727
|
|
|
} |
|
1728
|
1 |
|
if ($str_sub !== false) { |
|
1729
|
1 |
|
$extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text; |
|
1730
|
|
|
} else { |
|
1731
|
1 |
|
$extract = ''; |
|
1732
|
|
|
} |
|
1733
|
|
|
} else { |
|
1734
|
1 |
|
$extract = $str; |
|
1735
|
|
|
} |
|
1736
|
|
|
} |
|
1737
|
|
|
|
|
1738
|
1 |
|
return $extract; |
|
1739
|
|
|
} |
|
1740
|
|
|
|
|
1741
|
|
|
/** |
|
1742
|
|
|
* Reads entire file into a string. |
|
1743
|
|
|
* |
|
1744
|
|
|
* EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code> |
|
1745
|
|
|
* |
|
1746
|
|
|
* WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!! |
|
1747
|
|
|
* |
|
1748
|
|
|
* @see http://php.net/manual/en/function.file-get-contents.php |
|
1749
|
|
|
* |
|
1750
|
|
|
* @param string $filename <p> |
|
1751
|
|
|
* Name of the file to read. |
|
1752
|
|
|
* </p> |
|
1753
|
|
|
* @param bool $use_include_path [optional] <p> |
|
1754
|
|
|
* Prior to PHP 5, this parameter is called |
|
1755
|
|
|
* use_include_path and is a bool. |
|
1756
|
|
|
* As of PHP 5 the FILE_USE_INCLUDE_PATH can be used |
|
1757
|
|
|
* to trigger include path |
|
1758
|
|
|
* search. |
|
1759
|
|
|
* </p> |
|
1760
|
|
|
* @param resource|null $context [optional] <p> |
|
1761
|
|
|
* A valid context resource created with |
|
1762
|
|
|
* stream_context_create. If you don't need to use a |
|
1763
|
|
|
* custom context, you can skip this parameter by &null;. |
|
1764
|
|
|
* </p> |
|
1765
|
|
|
* @param int|null $offset [optional] <p> |
|
1766
|
|
|
* The offset where the reading starts. |
|
1767
|
|
|
* </p> |
|
1768
|
|
|
* @param int|null $max_length [optional] <p> |
|
1769
|
|
|
* Maximum length of data read. The default is to read until end |
|
1770
|
|
|
* of file is reached. |
|
1771
|
|
|
* </p> |
|
1772
|
|
|
* @param int $timeout <p>The time in seconds for the timeout.</p> |
|
1773
|
|
|
* @param bool $convert_to_utf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for |
|
1774
|
|
|
* some files, because they used non default utf-8 chars. Binary files |
|
1775
|
|
|
* like images or pdf will not be converted.</p> |
|
1776
|
|
|
* @param string $from_encoding [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br> |
|
1777
|
|
|
* A empty string will trigger the autodetect anyway.</p> |
|
1778
|
|
|
* |
|
1779
|
|
|
* @psalm-pure |
|
1780
|
|
|
* |
|
1781
|
|
|
* @return false|string |
|
1782
|
|
|
* <p>The function returns the read data as string or <b>false</b> on failure.</p> |
|
1783
|
|
|
*/ |
|
1784
|
12 |
|
public static function file_get_contents( |
|
1785
|
|
|
string $filename, |
|
1786
|
|
|
bool $use_include_path = false, |
|
1787
|
|
|
$context = null, |
|
1788
|
|
|
int $offset = null, |
|
1789
|
|
|
int $max_length = null, |
|
1790
|
|
|
int $timeout = 10, |
|
1791
|
|
|
bool $convert_to_utf8 = true, |
|
1792
|
|
|
string $from_encoding = '' |
|
1793
|
|
|
) { |
|
1794
|
|
|
// init |
|
1795
|
|
|
/** @noinspection CallableParameterUseCaseInTypeContextInspection - is ok here */ |
|
1796
|
12 |
|
$filename = Bootup::filter_sanitize_string_polyfill($filename); |
|
1797
|
12 |
|
if ($filename === false) { |
|
1798
|
|
|
return false; |
|
1799
|
|
|
} |
|
1800
|
|
|
|
|
1801
|
12 |
|
if ($timeout && $context === null) { |
|
1802
|
9 |
|
$context = \stream_context_create( |
|
1803
|
|
|
[ |
|
1804
|
|
|
'http' => [ |
|
1805
|
9 |
|
'timeout' => $timeout, |
|
1806
|
|
|
], |
|
1807
|
|
|
] |
|
1808
|
|
|
); |
|
1809
|
|
|
} |
|
1810
|
|
|
|
|
1811
|
12 |
|
if ($offset === null) { |
|
1812
|
12 |
|
$offset = 0; |
|
1813
|
|
|
} |
|
1814
|
|
|
|
|
1815
|
12 |
|
if (\is_int($max_length)) { |
|
1816
|
|
|
|
|
1817
|
2 |
|
if ($max_length < 0) { |
|
1818
|
|
|
$max_length = 0; |
|
1819
|
|
|
} |
|
1820
|
|
|
|
|
1821
|
2 |
|
$data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length); |
|
1822
|
|
|
} else { |
|
1823
|
12 |
|
$data = \file_get_contents($filename, $use_include_path, $context, $offset); |
|
1824
|
|
|
} |
|
1825
|
|
|
|
|
1826
|
|
|
// return false on error |
|
1827
|
12 |
|
if ($data === false) { |
|
1828
|
|
|
return false; |
|
1829
|
|
|
} |
|
1830
|
|
|
|
|
1831
|
12 |
|
if ($convert_to_utf8) { |
|
1832
|
|
|
if ( |
|
1833
|
12 |
|
!self::is_binary($data, true) |
|
1834
|
|
|
|| |
|
1835
|
9 |
|
self::is_utf16($data, false) !== false |
|
1836
|
|
|
|| |
|
1837
|
12 |
|
self::is_utf32($data, false) !== false |
|
1838
|
|
|
) { |
|
1839
|
9 |
|
$data = self::encode('UTF-8', $data, false, $from_encoding); |
|
1840
|
9 |
|
$data = self::cleanup($data); |
|
1841
|
|
|
} |
|
1842
|
|
|
} |
|
1843
|
|
|
|
|
1844
|
12 |
|
return $data; |
|
1845
|
|
|
} |
|
1846
|
|
|
|
|
1847
|
|
|
/** |
|
1848
|
|
|
* Checks if a file starts with BOM (Byte Order Mark) character. |
|
1849
|
|
|
* |
|
1850
|
|
|
* EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code> |
|
1851
|
|
|
* |
|
1852
|
|
|
* @param string $file_path <p>Path to a valid file.</p> |
|
1853
|
|
|
* |
|
1854
|
|
|
* @throws \RuntimeException if file_get_contents() returned false |
|
1855
|
|
|
* |
|
1856
|
|
|
* @return bool |
|
1857
|
|
|
* <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p> |
|
1858
|
|
|
* |
|
1859
|
|
|
* @psalm-pure |
|
1860
|
|
|
*/ |
|
1861
|
2 |
|
public static function file_has_bom(string $file_path): bool |
|
1862
|
|
|
{ |
|
1863
|
2 |
|
$file_content = \file_get_contents($file_path); |
|
1864
|
2 |
|
if ($file_content === false) { |
|
1865
|
|
|
throw new \RuntimeException('file_get_contents() returned false for:' . $file_path); |
|
1866
|
|
|
} |
|
1867
|
|
|
|
|
1868
|
2 |
|
return self::string_has_bom($file_content); |
|
1869
|
|
|
} |
|
1870
|
|
|
|
|
1871
|
|
|
/** |
|
1872
|
|
|
* Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
|
1873
|
|
|
* |
|
1874
|
|
|
* EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code> |
|
1875
|
|
|
* |
|
1876
|
|
|
* @param array|object|string $var |
|
1877
|
|
|
* @param int $normalization_form |
|
1878
|
|
|
* @param string $leading_combining |
|
1879
|
|
|
* |
|
1880
|
|
|
* @psalm-pure |
|
1881
|
|
|
* |
|
1882
|
|
|
* @return mixed |
|
1883
|
|
|
* |
|
1884
|
|
|
* @template TFilter |
|
1885
|
|
|
* @phpstan-param TFilter $var |
|
1886
|
|
|
* @phpstan-return TFilter |
|
1887
|
|
|
*/ |
|
1888
|
64 |
|
public static function filter( |
|
1889
|
|
|
$var, |
|
1890
|
|
|
int $normalization_form = \Normalizer::NFC, |
|
1891
|
|
|
string $leading_combining = '◌' |
|
1892
|
|
|
) { |
|
1893
|
64 |
|
switch (\gettype($var)) { |
|
1894
|
64 |
|
case 'object': |
|
1895
|
64 |
|
case 'array': |
|
1896
|
6 |
|
foreach ($var as &$v) { |
|
1897
|
6 |
|
$v = self::filter($v, $normalization_form, $leading_combining); |
|
1898
|
|
|
} |
|
1899
|
6 |
|
unset($v); |
|
1900
|
|
|
|
|
1901
|
6 |
|
break; |
|
1902
|
64 |
|
case 'string': |
|
1903
|
|
|
|
|
1904
|
62 |
|
if (\strpos($var, "\r") !== false) { |
|
1905
|
2 |
|
$var = self::normalize_line_ending($var); |
|
1906
|
|
|
} |
|
1907
|
|
|
|
|
1908
|
62 |
|
if (!ASCII::is_ascii($var)) { |
|
1909
|
32 |
|
if (\Normalizer::isNormalized($var, $normalization_form)) { |
|
1910
|
26 |
|
$n = '-'; |
|
1911
|
|
|
} else { |
|
1912
|
12 |
|
$n = \Normalizer::normalize($var, $normalization_form); |
|
1913
|
|
|
|
|
1914
|
12 |
|
if ($n && isset($n[0])) { |
|
1915
|
6 |
|
$var = $n; |
|
1916
|
|
|
} else { |
|
1917
|
8 |
|
$var = self::encode('UTF-8', $var); |
|
1918
|
|
|
} |
|
1919
|
|
|
} |
|
1920
|
|
|
|
|
1921
|
|
|
\assert(\is_string($var)); |
|
1922
|
|
|
if ( |
|
1923
|
32 |
|
$n |
|
1924
|
|
|
&& |
|
1925
|
32 |
|
$var[0] >= "\x80" |
|
1926
|
|
|
&& |
|
1927
|
32 |
|
isset($n[0], $leading_combining[0]) |
|
1928
|
|
|
&& |
|
1929
|
32 |
|
\preg_match('/^\\p{Mn}/u', $var) |
|
1930
|
|
|
) { |
|
1931
|
|
|
// Prevent leading combining chars |
|
1932
|
|
|
// for NFC-safe concatenations. |
|
1933
|
2 |
|
$var = $leading_combining . $var; |
|
1934
|
|
|
} |
|
1935
|
|
|
} |
|
1936
|
|
|
|
|
1937
|
62 |
|
break; |
|
1938
|
|
|
default: |
|
1939
|
|
|
// nothing |
|
1940
|
|
|
} |
|
1941
|
|
|
|
|
1942
|
|
|
/** @noinspection PhpSillyAssignmentInspection */ |
|
1943
|
|
|
/** @phpstan-var TFilter $var */ |
|
1944
|
64 |
|
$var = $var; |
|
1945
|
|
|
|
|
1946
|
64 |
|
return $var; |
|
1947
|
|
|
} |
|
1948
|
|
|
|
|
1949
|
|
|
/** |
|
1950
|
|
|
* "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
|
1951
|
|
|
* |
|
1952
|
|
|
* Gets a specific external variable by name and optionally filters it. |
|
1953
|
|
|
* |
|
1954
|
|
|
* EXAMPLE: <code> |
|
1955
|
|
|
* // _GET['foo'] = 'bar'; |
|
1956
|
|
|
* UTF8::filter_input(INPUT_GET, 'foo', FILTER_UNSAFE_RAW)); // 'bar' |
|
1957
|
|
|
* </code> |
|
1958
|
|
|
* |
|
1959
|
|
|
* @see http://php.net/manual/en/function.filter-input.php |
|
1960
|
|
|
* |
|
1961
|
|
|
* @param int $type <p> |
|
1962
|
|
|
* One of <b>INPUT_GET</b>, <b>INPUT_POST</b>, |
|
1963
|
|
|
* <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or |
|
1964
|
|
|
* <b>INPUT_ENV</b>. |
|
1965
|
|
|
* </p> |
|
1966
|
|
|
* @param string $variable_name <p> |
|
1967
|
|
|
* Name of a variable to get. |
|
1968
|
|
|
* </p> |
|
1969
|
|
|
* @param int $filter [optional] <p> |
|
1970
|
|
|
* The ID of the filter to apply. The |
|
1971
|
|
|
* manual page lists the available filters. |
|
1972
|
|
|
* </p> |
|
1973
|
|
|
* @param int|int[]|null $options [optional] <p> |
|
1974
|
|
|
* Associative array of options or bitwise disjunction of flags. If filter |
|
1975
|
|
|
* accepts options, flags can be provided in "flags" field of array. |
|
1976
|
|
|
* </p> |
|
1977
|
|
|
* |
|
1978
|
|
|
* @psalm-pure |
|
1979
|
|
|
* |
|
1980
|
|
|
* @return mixed |
|
1981
|
|
|
* <p> |
|
1982
|
|
|
* Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the |
|
1983
|
|
|
* <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it |
|
1984
|
|
|
* returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails. |
|
1985
|
|
|
* </p> |
|
1986
|
|
|
*/ |
|
1987
|
1 |
|
public static function filter_input( |
|
1988
|
|
|
int $type, |
|
1989
|
|
|
string $variable_name, |
|
1990
|
|
|
int $filter = \FILTER_DEFAULT, |
|
1991
|
|
|
$options = null |
|
1992
|
|
|
) { |
|
1993
|
|
|
/** |
|
1994
|
|
|
* @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here |
|
1995
|
|
|
*/ |
|
1996
|
1 |
|
if ($options === null || \func_num_args() < 4) { |
|
1997
|
1 |
|
$var = \filter_input($type, $variable_name, $filter); |
|
1998
|
|
|
} else { |
|
1999
|
|
|
$var = \filter_input($type, $variable_name, $filter, $options); |
|
2000
|
|
|
} |
|
2001
|
|
|
|
|
2002
|
1 |
|
return self::filter($var); |
|
2003
|
|
|
} |
|
2004
|
|
|
|
|
2005
|
|
|
/** |
|
2006
|
|
|
* "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
|
2007
|
|
|
* |
|
2008
|
|
|
* Gets external variables and optionally filters them. |
|
2009
|
|
|
* |
|
2010
|
|
|
* EXAMPLE: <code> |
|
2011
|
|
|
* // _GET['foo'] = 'bar'; |
|
2012
|
|
|
* UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_UNSAFE_RAW')); // array('bar') |
|
2013
|
|
|
* </code> |
|
2014
|
|
|
* |
|
2015
|
|
|
* @see http://php.net/manual/en/function.filter-input-array.php |
|
2016
|
|
|
* |
|
2017
|
|
|
* @param int $type <p> |
|
2018
|
|
|
* One of <b>INPUT_GET</b>, <b>INPUT_POST</b>, |
|
2019
|
|
|
* <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or |
|
2020
|
|
|
* <b>INPUT_ENV</b>. |
|
2021
|
|
|
* </p> |
|
2022
|
|
|
* @param array|null $definition [optional] <p> |
|
2023
|
|
|
* An array defining the arguments. A valid key is a string |
|
2024
|
|
|
* containing a variable name and a valid value is either a filter type, or an array |
|
2025
|
|
|
* optionally specifying the filter, flags and options. If the value is an |
|
2026
|
|
|
* array, valid keys are filter which specifies the |
|
2027
|
|
|
* filter type, |
|
2028
|
|
|
* flags which specifies any flags that apply to the |
|
2029
|
|
|
* filter, and options which specifies any options that |
|
2030
|
|
|
* apply to the filter. See the example below for a better understanding. |
|
2031
|
|
|
* </p> |
|
2032
|
|
|
* <p> |
|
2033
|
|
|
* This parameter can be also an integer holding a filter constant. Then all values in the |
|
2034
|
|
|
* input array are filtered by this filter. |
|
2035
|
|
|
* </p> |
|
2036
|
|
|
* @param bool $add_empty [optional] <p> |
|
2037
|
|
|
* Add missing keys as <b>NULL</b> to the return value. |
|
2038
|
|
|
* </p> |
|
2039
|
|
|
* |
|
2040
|
|
|
* @psalm-pure |
|
2041
|
|
|
* |
|
2042
|
|
|
* @return mixed |
|
2043
|
|
|
* <p> |
|
2044
|
|
|
* An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. |
|
2045
|
|
|
* An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not |
|
2046
|
|
|
* set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable |
|
2047
|
|
|
* is not set and <b>NULL</b> if the filter fails. |
|
2048
|
|
|
* </p> |
|
2049
|
|
|
*/ |
|
2050
|
1 |
|
public static function filter_input_array( |
|
2051
|
|
|
int $type, |
|
2052
|
|
|
$definition = null, |
|
2053
|
|
|
bool $add_empty = true |
|
2054
|
|
|
) { |
|
2055
|
|
|
/** |
|
2056
|
|
|
* @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here |
|
2057
|
|
|
*/ |
|
2058
|
1 |
|
if ($definition === null || \func_num_args() < 2) { |
|
2059
|
|
|
$a = \filter_input_array($type); |
|
2060
|
|
|
} else { |
|
2061
|
1 |
|
$a = \filter_input_array($type, $definition, $add_empty); |
|
2062
|
|
|
} |
|
2063
|
|
|
|
|
2064
|
1 |
|
return self::filter($a); |
|
2065
|
|
|
} |
|
2066
|
|
|
|
|
2067
|
|
|
/** |
|
2068
|
|
|
* "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
|
2069
|
|
|
* |
|
2070
|
|
|
* Filters a variable with a specified filter. |
|
2071
|
|
|
* |
|
2072
|
|
|
* EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code> |
|
2073
|
|
|
* |
|
2074
|
|
|
* @see http://php.net/manual/en/function.filter-var.php |
|
2075
|
|
|
* |
|
2076
|
|
|
* @param float|int|string|null $variable <p> |
|
2077
|
|
|
* Value to filter. |
|
2078
|
|
|
* </p> |
|
2079
|
|
|
* @param int $filter [optional] <p> |
|
2080
|
|
|
* The ID of the filter to apply. The |
|
2081
|
|
|
* manual page lists the available filters. |
|
2082
|
|
|
* </p> |
|
2083
|
|
|
* @param int|int[]|null $options [optional] <p> |
|
2084
|
|
|
* Associative array of options or bitwise disjunction of flags. If filter |
|
2085
|
|
|
* accepts options, flags can be provided in "flags" field of array. For |
|
2086
|
|
|
* the "callback" filter, callable type should be passed. The |
|
2087
|
|
|
* callback must accept one argument, the value to be filtered, and return |
|
2088
|
|
|
* the value after filtering/sanitizing it. |
|
2089
|
|
|
* </p> |
|
2090
|
|
|
* <p> |
|
2091
|
|
|
* <code> |
|
2092
|
|
|
* // for filters that accept options, use this format |
|
2093
|
|
|
* $options = array( |
|
2094
|
|
|
* 'options' => array( |
|
2095
|
|
|
* 'default' => 3, // value to return if the filter fails |
|
2096
|
|
|
* // other options here |
|
2097
|
|
|
* 'min_range' => 0 |
|
2098
|
|
|
* ), |
|
2099
|
|
|
* 'flags' => FILTER_FLAG_ALLOW_OCTAL, |
|
2100
|
|
|
* ); |
|
2101
|
|
|
* $var = filter_var('0755', FILTER_VALIDATE_INT, $options); |
|
2102
|
|
|
* // for filter that only accept flags, you can pass them directly |
|
2103
|
|
|
* $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE); |
|
2104
|
|
|
* // for filter that only accept flags, you can also pass as an array |
|
2105
|
|
|
* $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, |
|
2106
|
|
|
* array('flags' => FILTER_NULL_ON_FAILURE)); |
|
2107
|
|
|
* // callback validate filter |
|
2108
|
|
|
* function foo($value) |
|
2109
|
|
|
* { |
|
2110
|
|
|
* // Expected format: Surname, GivenNames |
|
2111
|
|
|
* if (strpos($value, ", ") === false) return false; |
|
2112
|
|
|
* list($surname, $givennames) = explode(", ", $value, 2); |
|
2113
|
|
|
* $empty = (empty($surname) || empty($givennames)); |
|
2114
|
|
|
* $notstrings = (!is_string($surname) || !is_string($givennames)); |
|
2115
|
|
|
* if ($empty || $notstrings) { |
|
2116
|
|
|
* return false; |
|
2117
|
|
|
* } else { |
|
2118
|
|
|
* return $value; |
|
2119
|
|
|
* } |
|
2120
|
|
|
* } |
|
2121
|
|
|
* $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo')); |
|
2122
|
|
|
* </code> |
|
2123
|
|
|
* </p> |
|
2124
|
|
|
* |
|
2125
|
|
|
* @psalm-pure |
|
2126
|
|
|
* |
|
2127
|
|
|
* @return mixed |
|
2128
|
|
|
* <p>The filtered data, or <b>FALSE</b> if the filter fails.</p> |
|
2129
|
|
|
*/ |
|
2130
|
2 |
|
public static function filter_var( |
|
2131
|
|
|
$variable, |
|
2132
|
|
|
int $filter = \FILTER_DEFAULT, |
|
2133
|
|
|
$options = null |
|
2134
|
|
|
) { |
|
2135
|
|
|
/** |
|
2136
|
|
|
* @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here |
|
2137
|
|
|
*/ |
|
2138
|
2 |
|
if (\func_num_args() < 3) { |
|
2139
|
2 |
|
$variable = \filter_var($variable, $filter); |
|
2140
|
|
|
} else { |
|
2141
|
2 |
|
$variable = \filter_var($variable, $filter, $options); |
|
|
|
|
|
|
2142
|
|
|
} |
|
2143
|
|
|
|
|
2144
|
2 |
|
return self::filter($variable); |
|
2145
|
|
|
} |
|
2146
|
|
|
|
|
2147
|
|
|
/** |
|
2148
|
|
|
* "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
|
2149
|
|
|
* |
|
2150
|
|
|
* Gets multiple variables and optionally filters them. |
|
2151
|
|
|
* |
|
2152
|
|
|
* EXAMPLE: <code> |
|
2153
|
|
|
* $filters = [ |
|
2154
|
|
|
* 'name' => ['filter' => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']], |
|
2155
|
|
|
* 'age' => ['filter' => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]], |
|
2156
|
|
|
* 'email' => FILTER_VALIDATE_EMAIL, |
|
2157
|
|
|
* ]; |
|
2158
|
|
|
* |
|
2159
|
|
|
* $data = [ |
|
2160
|
|
|
* 'name' => 'κόσμε', |
|
2161
|
|
|
* 'age' => '18', |
|
2162
|
|
|
* 'email' => '[email protected]' |
|
2163
|
|
|
* ]; |
|
2164
|
|
|
* |
|
2165
|
|
|
* UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]'] |
|
2166
|
|
|
* </code> |
|
2167
|
|
|
* |
|
2168
|
|
|
* @see http://php.net/manual/en/function.filter-var-array.php |
|
2169
|
|
|
* |
|
2170
|
|
|
* @param array<mixed> $data <p> |
|
2171
|
|
|
* An array with string keys containing the data to filter. |
|
2172
|
|
|
* </p> |
|
2173
|
|
|
* @param array|int|null $definition [optional] <p> |
|
2174
|
|
|
* An array defining the arguments. A valid key is a string |
|
2175
|
|
|
* containing a variable name and a valid value is either a |
|
2176
|
|
|
* filter type, or an |
|
2177
|
|
|
* array optionally specifying the filter, flags and options. |
|
2178
|
|
|
* If the value is an array, valid keys are filter |
|
2179
|
|
|
* which specifies the filter type, |
|
2180
|
|
|
* flags which specifies any flags that apply to the |
|
2181
|
|
|
* filter, and options which specifies any options that |
|
2182
|
|
|
* apply to the filter. See the example below for a better understanding. |
|
2183
|
|
|
* </p> |
|
2184
|
|
|
* <p> |
|
2185
|
|
|
* This parameter can be also an integer holding a filter constant. Then all values |
|
2186
|
|
|
* in the input array are filtered by this filter. |
|
2187
|
|
|
* </p> |
|
2188
|
|
|
* @param bool $add_empty [optional] <p> |
|
2189
|
|
|
* Add missing keys as <b>NULL</b> to the return value. |
|
2190
|
|
|
* </p> |
|
2191
|
|
|
* |
|
2192
|
|
|
* @psalm-pure |
|
2193
|
|
|
* |
|
2194
|
|
|
* @return mixed |
|
2195
|
|
|
* <p> |
|
2196
|
|
|
* An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. |
|
2197
|
|
|
* An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not |
|
2198
|
|
|
* set. |
|
2199
|
|
|
* </p> |
|
2200
|
|
|
*/ |
|
2201
|
2 |
|
public static function filter_var_array( |
|
2202
|
|
|
array $data, |
|
2203
|
|
|
$definition = null, |
|
2204
|
|
|
bool $add_empty = true |
|
2205
|
|
|
) { |
|
2206
|
|
|
/** |
|
2207
|
|
|
* @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here |
|
2208
|
|
|
*/ |
|
2209
|
2 |
|
if (\func_num_args() < 2) { |
|
2210
|
2 |
|
$a = \filter_var_array($data); |
|
2211
|
|
|
} else { |
|
2212
|
2 |
|
$a = \filter_var_array($data, $definition, $add_empty); |
|
|
|
|
|
|
2213
|
|
|
} |
|
2214
|
|
|
|
|
2215
|
2 |
|
return self::filter($a); |
|
2216
|
|
|
} |
|
2217
|
|
|
|
|
2218
|
|
|
/** |
|
2219
|
|
|
* Checks whether finfo is available on the server. |
|
2220
|
|
|
* |
|
2221
|
|
|
* @psalm-pure |
|
2222
|
|
|
* |
|
2223
|
|
|
* @return bool |
|
2224
|
|
|
* <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> |
|
2225
|
|
|
* |
|
2226
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
|
2227
|
|
|
*/ |
|
2228
|
|
|
public static function finfo_loaded(): bool |
|
2229
|
|
|
{ |
|
2230
|
|
|
return \class_exists('finfo'); |
|
2231
|
|
|
} |
|
2232
|
|
|
|
|
2233
|
|
|
/** |
|
2234
|
|
|
* Returns the first $n characters of the string. |
|
2235
|
|
|
* |
|
2236
|
|
|
* @param string $str <p>The input string.</p> |
|
2237
|
|
|
* @param int $n <p>Number of characters to retrieve from the start.</p> |
|
2238
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
2239
|
|
|
* |
|
2240
|
|
|
* @psalm-pure |
|
2241
|
|
|
* |
|
2242
|
|
|
* @return string |
|
2243
|
|
|
*/ |
|
2244
|
13 |
|
public static function first_char( |
|
2245
|
|
|
string $str, |
|
2246
|
|
|
int $n = 1, |
|
2247
|
|
|
string $encoding = 'UTF-8' |
|
2248
|
|
|
): string { |
|
2249
|
13 |
|
if ($str === '' || $n <= 0) { |
|
2250
|
5 |
|
return ''; |
|
2251
|
|
|
} |
|
2252
|
|
|
|
|
2253
|
8 |
|
if ($encoding === 'UTF-8') { |
|
2254
|
4 |
|
return (string) \mb_substr($str, 0, $n); |
|
2255
|
|
|
} |
|
2256
|
|
|
|
|
2257
|
4 |
|
return (string) self::substr($str, 0, $n, $encoding); |
|
2258
|
|
|
} |
|
2259
|
|
|
|
|
2260
|
|
|
/** |
|
2261
|
|
|
* Check if the number of Unicode characters isn't greater than the specified integer. |
|
2262
|
|
|
* |
|
2263
|
|
|
* EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code> |
|
2264
|
|
|
* |
|
2265
|
|
|
* @param string $str the original string to be checked |
|
2266
|
|
|
* @param int $box_size the size in number of chars to be checked against string |
|
2267
|
|
|
* |
|
2268
|
|
|
* @psalm-pure |
|
2269
|
|
|
* |
|
2270
|
|
|
* @return bool |
|
2271
|
|
|
* <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p> |
|
2272
|
|
|
*/ |
|
2273
|
2 |
|
public static function fits_inside(string $str, int $box_size): bool |
|
2274
|
|
|
{ |
|
2275
|
2 |
|
return (int) self::strlen($str) <= $box_size; |
|
2276
|
|
|
} |
|
2277
|
|
|
|
|
2278
|
|
|
/** |
|
2279
|
|
|
* Try to fix simple broken UTF-8 strings. |
|
2280
|
|
|
* |
|
2281
|
|
|
* INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings. |
|
2282
|
|
|
* |
|
2283
|
|
|
* EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code> |
|
2284
|
|
|
* |
|
2285
|
|
|
* If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1 |
|
2286
|
|
|
* (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. |
|
2287
|
|
|
* See: http://en.wikipedia.org/wiki/Windows-1252 |
|
2288
|
|
|
* |
|
2289
|
|
|
* @param string $str <p>The input string</p> |
|
2290
|
|
|
* |
|
2291
|
|
|
* @psalm-pure |
|
2292
|
|
|
* |
|
2293
|
|
|
* @return string |
|
2294
|
|
|
*/ |
|
2295
|
46 |
|
public static function fix_simple_utf8(string $str): string |
|
2296
|
|
|
{ |
|
2297
|
46 |
|
if ($str === '') { |
|
2298
|
4 |
|
return ''; |
|
2299
|
|
|
} |
|
2300
|
|
|
|
|
2301
|
|
|
/** |
|
2302
|
|
|
* @psalm-suppress ImpureStaticVariable |
|
2303
|
|
|
* |
|
2304
|
|
|
* @var array<mixed>|null |
|
2305
|
|
|
*/ |
|
2306
|
46 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
|
2307
|
|
|
|
|
2308
|
|
|
/** |
|
2309
|
|
|
* @psalm-suppress ImpureStaticVariable |
|
2310
|
|
|
* |
|
2311
|
|
|
* @var array<mixed>|null |
|
2312
|
|
|
*/ |
|
2313
|
46 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
|
2314
|
|
|
|
|
2315
|
46 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
|
2316
|
1 |
|
if (self::$BROKEN_UTF8_FIX === null) { |
|
2317
|
1 |
|
self::$BROKEN_UTF8_FIX = self::getData('utf8_fix'); |
|
2318
|
|
|
} |
|
2319
|
|
|
|
|
2320
|
1 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX ?: []); |
|
2321
|
1 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX; |
|
2322
|
|
|
} |
|
2323
|
|
|
|
|
2324
|
|
|
\assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE)); |
|
2325
|
|
|
|
|
2326
|
46 |
|
return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
|
2327
|
|
|
} |
|
2328
|
|
|
|
|
2329
|
|
|
/** |
|
2330
|
|
|
* Fix a double (or multiple) encoded UTF8 string. |
|
2331
|
|
|
* |
|
2332
|
|
|
* EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code> |
|
2333
|
|
|
* |
|
2334
|
|
|
* @param string|string[] $str you can use a string or an array of strings |
|
2335
|
|
|
* |
|
2336
|
|
|
* @psalm-pure |
|
2337
|
|
|
* |
|
2338
|
|
|
* @return string|string[] |
|
2339
|
|
|
* <p>Will return the fixed input-"array" or |
|
2340
|
|
|
* the fixed input-"string".</p> |
|
2341
|
|
|
* |
|
2342
|
|
|
* @template TFixUtf8 |
|
2343
|
|
|
* @phpstan-param TFixUtf8 $str |
|
2344
|
|
|
* @phpstan-return TFixUtf8 |
|
2345
|
|
|
*/ |
|
2346
|
2 |
|
public static function fix_utf8($str) |
|
2347
|
|
|
{ |
|
2348
|
2 |
|
if (\is_array($str)) { |
|
2349
|
2 |
|
foreach ($str as &$v) { |
|
2350
|
2 |
|
$v = self::fix_utf8($v); |
|
2351
|
|
|
} |
|
2352
|
2 |
|
unset($v); |
|
2353
|
|
|
|
|
2354
|
|
|
/** |
|
2355
|
|
|
* @psalm-suppress InvalidReturnStatement |
|
2356
|
|
|
*/ |
|
2357
|
2 |
|
return $str; |
|
2358
|
|
|
} |
|
2359
|
|
|
|
|
2360
|
2 |
|
$str = (string) $str; |
|
2361
|
2 |
|
$last = ''; |
|
2362
|
2 |
|
while ($last !== $str) { |
|
2363
|
2 |
|
$last = $str; |
|
2364
|
|
|
/** |
|
2365
|
|
|
* @psalm-suppress PossiblyInvalidArgument |
|
2366
|
|
|
*/ |
|
2367
|
2 |
|
$str = self::to_utf8( |
|
2368
|
2 |
|
self::utf8_decode($str, true) |
|
2369
|
|
|
); |
|
2370
|
|
|
} |
|
2371
|
|
|
|
|
2372
|
|
|
/** |
|
2373
|
|
|
* @psalm-suppress InvalidReturnStatement |
|
2374
|
|
|
*/ |
|
2375
|
2 |
|
return $str; |
|
2376
|
|
|
} |
|
2377
|
|
|
|
|
2378
|
|
|
/** |
|
2379
|
|
|
* Get character of a specific character. |
|
2380
|
|
|
* |
|
2381
|
|
|
* EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code> |
|
2382
|
|
|
* |
|
2383
|
|
|
* @param string $char |
|
2384
|
|
|
* |
|
2385
|
|
|
* @psalm-pure |
|
2386
|
|
|
* |
|
2387
|
|
|
* @return string |
|
2388
|
|
|
* <p>'RTL' or 'LTR'.</p> |
|
2389
|
|
|
*/ |
|
2390
|
2 |
|
public static function getCharDirection(string $char): string |
|
2391
|
|
|
{ |
|
2392
|
2 |
|
if (self::$SUPPORT['intlChar'] === true) { |
|
2393
|
2 |
|
$tmp_return = \IntlChar::charDirection($char); |
|
2394
|
|
|
|
|
2395
|
|
|
// from "IntlChar"-Class |
|
2396
|
|
|
$char_direction = [ |
|
2397
|
2 |
|
'RTL' => [1, 13, 14, 15, 21], |
|
2398
|
|
|
'LTR' => [0, 11, 12, 20], |
|
2399
|
|
|
]; |
|
2400
|
|
|
|
|
2401
|
2 |
|
if (\in_array($tmp_return, $char_direction['LTR'], true)) { |
|
2402
|
|
|
return 'LTR'; |
|
2403
|
|
|
} |
|
2404
|
|
|
|
|
2405
|
2 |
|
if (\in_array($tmp_return, $char_direction['RTL'], true)) { |
|
2406
|
2 |
|
return 'RTL'; |
|
2407
|
|
|
} |
|
2408
|
|
|
} |
|
2409
|
|
|
|
|
2410
|
2 |
|
$c = static::chr_to_decimal($char); |
|
2411
|
|
|
|
|
2412
|
2 |
|
if (!($c >= 0x5be && $c <= 0x10b7f)) { |
|
2413
|
2 |
|
return 'LTR'; |
|
2414
|
|
|
} |
|
2415
|
|
|
|
|
2416
|
2 |
|
if ($c <= 0x85e) { |
|
2417
|
2 |
|
if ($c === 0x5be || |
|
2418
|
2 |
|
$c === 0x5c0 || |
|
2419
|
2 |
|
$c === 0x5c3 || |
|
2420
|
2 |
|
$c === 0x5c6 || |
|
2421
|
2 |
|
($c >= 0x5d0 && $c <= 0x5ea) || |
|
2422
|
2 |
|
($c >= 0x5f0 && $c <= 0x5f4) || |
|
2423
|
2 |
|
$c === 0x608 || |
|
2424
|
2 |
|
$c === 0x60b || |
|
2425
|
2 |
|
$c === 0x60d || |
|
2426
|
2 |
|
$c === 0x61b || |
|
2427
|
2 |
|
($c >= 0x61e && $c <= 0x64a) || |
|
2428
|
|
|
($c >= 0x66d && $c <= 0x66f) || |
|
2429
|
|
|
($c >= 0x671 && $c <= 0x6d5) || |
|
2430
|
|
|
($c >= 0x6e5 && $c <= 0x6e6) || |
|
2431
|
|
|
($c >= 0x6ee && $c <= 0x6ef) || |
|
2432
|
|
|
($c >= 0x6fa && $c <= 0x70d) || |
|
2433
|
|
|
$c === 0x710 || |
|
2434
|
|
|
($c >= 0x712 && $c <= 0x72f) || |
|
2435
|
|
|
($c >= 0x74d && $c <= 0x7a5) || |
|
2436
|
|
|
$c === 0x7b1 || |
|
2437
|
|
|
($c >= 0x7c0 && $c <= 0x7ea) || |
|
2438
|
|
|
($c >= 0x7f4 && $c <= 0x7f5) || |
|
2439
|
|
|
$c === 0x7fa || |
|
2440
|
|
|
($c >= 0x800 && $c <= 0x815) || |
|
2441
|
|
|
$c === 0x81a || |
|
2442
|
|
|
$c === 0x824 || |
|
2443
|
|
|
$c === 0x828 || |
|
2444
|
|
|
($c >= 0x830 && $c <= 0x83e) || |
|
2445
|
|
|
($c >= 0x840 && $c <= 0x858) || |
|
2446
|
2 |
|
$c === 0x85e |
|
2447
|
|
|
) { |
|
2448
|
2 |
|
return 'RTL'; |
|
2449
|
|
|
} |
|
2450
|
2 |
|
} elseif ($c === 0x200f) { |
|
2451
|
|
|
return 'RTL'; |
|
2452
|
2 |
|
} elseif ($c >= 0xfb1d) { |
|
2453
|
2 |
|
if ($c === 0xfb1d || |
|
2454
|
2 |
|
($c >= 0xfb1f && $c <= 0xfb28) || |
|
2455
|
2 |
|
($c >= 0xfb2a && $c <= 0xfb36) || |
|
2456
|
2 |
|
($c >= 0xfb38 && $c <= 0xfb3c) || |
|
2457
|
2 |
|
$c === 0xfb3e || |
|
2458
|
2 |
|
($c >= 0xfb40 && $c <= 0xfb41) || |
|
2459
|
2 |
|
($c >= 0xfb43 && $c <= 0xfb44) || |
|
2460
|
2 |
|
($c >= 0xfb46 && $c <= 0xfbc1) || |
|
2461
|
2 |
|
($c >= 0xfbd3 && $c <= 0xfd3d) || |
|
2462
|
2 |
|
($c >= 0xfd50 && $c <= 0xfd8f) || |
|
2463
|
2 |
|
($c >= 0xfd92 && $c <= 0xfdc7) || |
|
2464
|
2 |
|
($c >= 0xfdf0 && $c <= 0xfdfc) || |
|
2465
|
2 |
|
($c >= 0xfe70 && $c <= 0xfe74) || |
|
2466
|
2 |
|
($c >= 0xfe76 && $c <= 0xfefc) || |
|
2467
|
2 |
|
($c >= 0x10800 && $c <= 0x10805) || |
|
2468
|
2 |
|
$c === 0x10808 || |
|
2469
|
2 |
|
($c >= 0x1080a && $c <= 0x10835) || |
|
2470
|
2 |
|
($c >= 0x10837 && $c <= 0x10838) || |
|
2471
|
2 |
|
$c === 0x1083c || |
|
2472
|
2 |
|
($c >= 0x1083f && $c <= 0x10855) || |
|
2473
|
2 |
|
($c >= 0x10857 && $c <= 0x1085f) || |
|
2474
|
2 |
|
($c >= 0x10900 && $c <= 0x1091b) || |
|
2475
|
2 |
|
($c >= 0x10920 && $c <= 0x10939) || |
|
2476
|
2 |
|
$c === 0x1093f || |
|
2477
|
2 |
|
$c === 0x10a00 || |
|
2478
|
2 |
|
($c >= 0x10a10 && $c <= 0x10a13) || |
|
2479
|
2 |
|
($c >= 0x10a15 && $c <= 0x10a17) || |
|
2480
|
2 |
|
($c >= 0x10a19 && $c <= 0x10a33) || |
|
2481
|
2 |
|
($c >= 0x10a40 && $c <= 0x10a47) || |
|
2482
|
2 |
|
($c >= 0x10a50 && $c <= 0x10a58) || |
|
2483
|
2 |
|
($c >= 0x10a60 && $c <= 0x10a7f) || |
|
2484
|
2 |
|
($c >= 0x10b00 && $c <= 0x10b35) || |
|
2485
|
2 |
|
($c >= 0x10b40 && $c <= 0x10b55) || |
|
2486
|
2 |
|
($c >= 0x10b58 && $c <= 0x10b72) || |
|
2487
|
2 |
|
($c >= 0x10b78) |
|
2488
|
|
|
) { |
|
2489
|
2 |
|
return 'RTL'; |
|
2490
|
|
|
} |
|
2491
|
|
|
} |
|
2492
|
|
|
|
|
2493
|
2 |
|
return 'LTR'; |
|
2494
|
|
|
} |
|
2495
|
|
|
|
|
2496
|
|
|
/** |
|
2497
|
|
|
* Check for php-support. |
|
2498
|
|
|
* |
|
2499
|
|
|
* @param string|null $key |
|
2500
|
|
|
* |
|
2501
|
|
|
* @psalm-pure |
|
2502
|
|
|
* |
|
2503
|
|
|
* @return mixed |
|
2504
|
|
|
* Return the full support-"array", if $key === null<br> |
|
2505
|
|
|
* return bool-value, if $key is used and available<br> |
|
2506
|
|
|
* otherwise return <strong>null</strong> |
|
2507
|
|
|
*/ |
|
2508
|
27 |
|
public static function getSupportInfo(string $key = null) |
|
2509
|
|
|
{ |
|
2510
|
27 |
|
if ($key === null) { |
|
2511
|
4 |
|
return self::$SUPPORT; |
|
2512
|
|
|
} |
|
2513
|
|
|
|
|
2514
|
25 |
|
if (self::$INTL_TRANSLITERATOR_LIST === null) { |
|
2515
|
1 |
|
self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); |
|
2516
|
|
|
} |
|
2517
|
|
|
// compatibility fix for old versions |
|
2518
|
25 |
|
self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST; |
|
2519
|
|
|
|
|
2520
|
25 |
|
return self::$SUPPORT[$key] ?? null; |
|
2521
|
|
|
} |
|
2522
|
|
|
|
|
2523
|
|
|
/** |
|
2524
|
|
|
* Warning: this method only works for some file-types (png, jpg) |
|
2525
|
|
|
* if you need more supported types, please use e.g. "finfo" |
|
2526
|
|
|
* |
|
2527
|
|
|
* @param string $str |
|
2528
|
|
|
* @param array $fallback <p>with this keys: 'ext', 'mime', 'type' |
|
2529
|
|
|
* |
|
2530
|
|
|
* @psalm-pure |
|
2531
|
|
|
* |
|
2532
|
|
|
* @return null[]|string[] |
|
2533
|
|
|
* <p>with this keys: 'ext', 'mime', 'type'</p> |
|
2534
|
|
|
* |
|
2535
|
|
|
* @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback |
|
2536
|
|
|
*/ |
|
2537
|
39 |
|
public static function get_file_type( |
|
2538
|
|
|
string $str, |
|
2539
|
|
|
array $fallback = [ |
|
2540
|
|
|
'ext' => null, |
|
2541
|
|
|
'mime' => 'application/octet-stream', |
|
2542
|
|
|
'type' => null, |
|
2543
|
|
|
] |
|
2544
|
|
|
): array { |
|
2545
|
39 |
|
if ($str === '') { |
|
2546
|
|
|
return $fallback; |
|
2547
|
|
|
} |
|
2548
|
|
|
|
|
2549
|
|
|
/** @var false|string $str_info - needed for PhpStan (stubs error) */ |
|
2550
|
39 |
|
$str_info = \substr($str, 0, 2); |
|
2551
|
39 |
|
if ($str_info === false || \strlen($str_info) !== 2) { |
|
2552
|
10 |
|
return $fallback; |
|
2553
|
|
|
} |
|
2554
|
|
|
|
|
2555
|
|
|
// DEBUG |
|
2556
|
|
|
//var_dump($str_info); |
|
2557
|
|
|
|
|
2558
|
36 |
|
$str_info = \unpack('C2chars', $str_info); |
|
2559
|
|
|
|
|
2560
|
36 |
|
if ($str_info === false) { |
|
2561
|
|
|
return $fallback; |
|
2562
|
|
|
} |
|
2563
|
36 |
|
$type_code = (int) ($str_info['chars1'] . $str_info['chars2']); |
|
2564
|
|
|
|
|
2565
|
|
|
// DEBUG |
|
2566
|
|
|
//var_dump($type_code); |
|
2567
|
|
|
|
|
2568
|
|
|
// |
|
2569
|
|
|
// info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator |
|
2570
|
|
|
// |
|
2571
|
|
|
switch ($type_code) { |
|
2572
|
|
|
// WARNING: do not add too simple comparisons, because of false-positive results: |
|
2573
|
|
|
// |
|
2574
|
|
|
// 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip', |
|
2575
|
|
|
// 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ... |
|
2576
|
|
|
// |
|
2577
|
36 |
|
case 255216: |
|
2578
|
|
|
$ext = 'jpg'; |
|
2579
|
|
|
$mime = 'image/jpeg'; |
|
2580
|
|
|
$type = 'binary'; |
|
2581
|
|
|
|
|
2582
|
|
|
break; |
|
2583
|
36 |
|
case 13780: |
|
2584
|
7 |
|
$ext = 'png'; |
|
2585
|
7 |
|
$mime = 'image/png'; |
|
2586
|
7 |
|
$type = 'binary'; |
|
2587
|
|
|
|
|
2588
|
7 |
|
break; |
|
2589
|
|
|
default: |
|
2590
|
35 |
|
return $fallback; |
|
2591
|
|
|
} |
|
2592
|
|
|
|
|
2593
|
|
|
return [ |
|
2594
|
7 |
|
'ext' => $ext, |
|
2595
|
7 |
|
'mime' => $mime, |
|
2596
|
7 |
|
'type' => $type, |
|
2597
|
|
|
]; |
|
2598
|
|
|
} |
|
2599
|
|
|
|
|
2600
|
|
|
/** |
|
2601
|
|
|
* @param int $length <p>Length of the random string.</p> |
|
2602
|
|
|
* @param string $possible_chars [optional] <p>Characters string for the random selection.</p> |
|
2603
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
2604
|
|
|
* |
|
2605
|
|
|
* @return string |
|
2606
|
|
|
*/ |
|
2607
|
1 |
|
public static function get_random_string( |
|
2608
|
|
|
int $length, |
|
2609
|
|
|
string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', |
|
2610
|
|
|
string $encoding = 'UTF-8' |
|
2611
|
|
|
): string { |
|
2612
|
|
|
// init |
|
2613
|
1 |
|
$i = 0; |
|
2614
|
1 |
|
$str = ''; |
|
2615
|
|
|
|
|
2616
|
|
|
// |
|
2617
|
|
|
// add random chars |
|
2618
|
|
|
// |
|
2619
|
|
|
|
|
2620
|
1 |
|
if ($encoding === 'UTF-8') { |
|
2621
|
1 |
|
$max_length = (int) \mb_strlen($possible_chars); |
|
2622
|
1 |
|
if ($max_length === 0) { |
|
2623
|
1 |
|
return ''; |
|
2624
|
|
|
} |
|
2625
|
|
|
|
|
2626
|
1 |
|
while ($i < $length) { |
|
2627
|
|
|
try { |
|
2628
|
1 |
|
$rand_int = \random_int(0, $max_length - 1); |
|
2629
|
|
|
} catch (\Exception $e) { |
|
2630
|
|
|
$rand_int = \mt_rand(0, $max_length - 1); |
|
2631
|
|
|
} |
|
2632
|
1 |
|
$char = \mb_substr($possible_chars, $rand_int, 1); |
|
2633
|
1 |
|
if ($char !== false) { |
|
2634
|
1 |
|
$str .= $char; |
|
2635
|
1 |
|
++$i; |
|
2636
|
|
|
} |
|
2637
|
|
|
} |
|
2638
|
|
|
} else { |
|
2639
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
2640
|
|
|
|
|
2641
|
|
|
$max_length = (int) self::strlen($possible_chars, $encoding); |
|
2642
|
|
|
if ($max_length === 0) { |
|
2643
|
|
|
return ''; |
|
2644
|
|
|
} |
|
2645
|
|
|
|
|
2646
|
|
|
while ($i < $length) { |
|
2647
|
|
|
try { |
|
2648
|
|
|
$rand_int = \random_int(0, $max_length - 1); |
|
2649
|
|
|
} catch (\Exception $e) { |
|
2650
|
|
|
$rand_int = \mt_rand(0, $max_length - 1); |
|
2651
|
|
|
} |
|
2652
|
|
|
$char = self::substr($possible_chars, $rand_int, 1, $encoding); |
|
2653
|
|
|
if ($char !== false) { |
|
2654
|
|
|
$str .= $char; |
|
2655
|
|
|
++$i; |
|
2656
|
|
|
} |
|
2657
|
|
|
} |
|
2658
|
|
|
} |
|
2659
|
|
|
|
|
2660
|
1 |
|
return $str; |
|
2661
|
|
|
} |
|
2662
|
|
|
|
|
2663
|
|
|
/** |
|
2664
|
|
|
* @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p> |
|
2665
|
|
|
* @param bool $use_md5 [optional] <p>Return the unique identifier as md5-hash? Default: true</p> |
|
2666
|
|
|
* |
|
2667
|
|
|
* @return string |
|
2668
|
|
|
*/ |
|
2669
|
1 |
|
public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string |
|
2670
|
|
|
{ |
|
2671
|
|
|
try { |
|
2672
|
1 |
|
$rand_int = \random_int(0, \mt_getrandmax()); |
|
2673
|
|
|
} catch (\Exception $e) { |
|
2674
|
|
|
$rand_int = \mt_rand(0, \mt_getrandmax()); |
|
2675
|
|
|
} |
|
2676
|
|
|
|
|
2677
|
|
|
$unique_helper = $rand_int . |
|
2678
|
1 |
|
\session_id() . |
|
2679
|
1 |
|
($_SERVER['REMOTE_ADDR'] ?? '') . |
|
2680
|
1 |
|
($_SERVER['SERVER_ADDR'] ?? '') . |
|
2681
|
1 |
|
$extra_entropy; |
|
2682
|
|
|
|
|
2683
|
1 |
|
$unique_string = \uniqid($unique_helper, true); |
|
2684
|
|
|
|
|
2685
|
1 |
|
if ($use_md5) { |
|
2686
|
1 |
|
$unique_string = \md5($unique_string . $unique_helper); |
|
2687
|
|
|
} |
|
2688
|
|
|
|
|
2689
|
1 |
|
return $unique_string; |
|
2690
|
|
|
} |
|
2691
|
|
|
|
|
2692
|
|
|
/** |
|
2693
|
|
|
* Returns true if the string contains a lower case char, false otherwise. |
|
2694
|
|
|
* |
|
2695
|
|
|
* @param string $str <p>The input string.</p> |
|
2696
|
|
|
* |
|
2697
|
|
|
* @psalm-pure |
|
2698
|
|
|
* |
|
2699
|
|
|
* @return bool |
|
2700
|
|
|
* <p>Whether or not the string contains a lower case character.</p> |
|
2701
|
|
|
*/ |
|
2702
|
47 |
|
public static function has_lowercase(string $str): bool |
|
2703
|
|
|
{ |
|
2704
|
47 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
2705
|
47 |
|
return \mb_ereg_match('.*[[:lower:]]', $str); |
|
2706
|
|
|
} |
|
2707
|
|
|
|
|
2708
|
|
|
return self::str_matches_pattern($str, '.*[[:lower:]]'); |
|
2709
|
|
|
} |
|
2710
|
|
|
|
|
2711
|
|
|
/** |
|
2712
|
|
|
* Returns true if the string contains whitespace, false otherwise. |
|
2713
|
|
|
* |
|
2714
|
|
|
* @param string $str <p>The input string.</p> |
|
2715
|
|
|
* |
|
2716
|
|
|
* @psalm-pure |
|
2717
|
|
|
* |
|
2718
|
|
|
* @return bool |
|
2719
|
|
|
* <p>Whether or not the string contains whitespace.</p> |
|
2720
|
|
|
*/ |
|
2721
|
11 |
|
public static function has_whitespace(string $str): bool |
|
2722
|
|
|
{ |
|
2723
|
11 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
2724
|
11 |
|
return \mb_ereg_match('.*[[:space:]]', $str); |
|
2725
|
|
|
} |
|
2726
|
|
|
|
|
2727
|
|
|
return self::str_matches_pattern($str, '.*[[:space:]]'); |
|
2728
|
|
|
} |
|
2729
|
|
|
|
|
2730
|
|
|
/** |
|
2731
|
|
|
* Returns true if the string contains an upper case char, false otherwise. |
|
2732
|
|
|
* |
|
2733
|
|
|
* @param string $str <p>The input string.</p> |
|
2734
|
|
|
* |
|
2735
|
|
|
* @psalm-pure |
|
2736
|
|
|
* |
|
2737
|
|
|
* @return bool |
|
2738
|
|
|
* <p>Whether or not the string contains an upper case character.</p> |
|
2739
|
|
|
*/ |
|
2740
|
12 |
|
public static function has_uppercase(string $str): bool |
|
2741
|
|
|
{ |
|
2742
|
12 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
2743
|
12 |
|
return \mb_ereg_match('.*[[:upper:]]', $str); |
|
2744
|
|
|
} |
|
2745
|
|
|
|
|
2746
|
|
|
return self::str_matches_pattern($str, '.*[[:upper:]]'); |
|
2747
|
|
|
} |
|
2748
|
|
|
|
|
2749
|
|
|
/** |
|
2750
|
|
|
* Converts a hexadecimal value into a UTF-8 character. |
|
2751
|
|
|
* |
|
2752
|
|
|
* INFO: opposite to UTF8::chr_to_hex() |
|
2753
|
|
|
* |
|
2754
|
|
|
* EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code> |
|
2755
|
|
|
* |
|
2756
|
|
|
* @param string $hexdec <p>The hexadecimal value.</p> |
|
2757
|
|
|
* |
|
2758
|
|
|
* @psalm-pure |
|
2759
|
|
|
* |
|
2760
|
|
|
* @return false|string one single UTF-8 character |
|
2761
|
|
|
*/ |
|
2762
|
4 |
|
public static function hex_to_chr(string $hexdec) |
|
2763
|
|
|
{ |
|
2764
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */ |
|
2765
|
4 |
|
return self::decimal_to_chr((int) @\hexdec($hexdec)); |
|
2766
|
|
|
} |
|
2767
|
|
|
|
|
2768
|
|
|
/** |
|
2769
|
|
|
* Converts hexadecimal U+xxxx code point representation to integer. |
|
2770
|
|
|
* |
|
2771
|
|
|
* INFO: opposite to UTF8::int_to_hex() |
|
2772
|
|
|
* |
|
2773
|
|
|
* EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code> |
|
2774
|
|
|
* |
|
2775
|
|
|
* @param string $hexdec <p>The hexadecimal code point representation.</p> |
|
2776
|
|
|
* |
|
2777
|
|
|
* @psalm-pure |
|
2778
|
|
|
* |
|
2779
|
|
|
* @return false|int |
|
2780
|
|
|
* <p>The code point, or false on failure.</p> |
|
2781
|
|
|
*/ |
|
2782
|
2 |
|
public static function hex_to_int($hexdec) |
|
2783
|
|
|
{ |
|
2784
|
|
|
// init |
|
2785
|
2 |
|
$hexdec = (string) $hexdec; |
|
2786
|
|
|
|
|
2787
|
2 |
|
if ($hexdec === '') { |
|
2788
|
2 |
|
return false; |
|
2789
|
|
|
} |
|
2790
|
|
|
|
|
2791
|
2 |
|
if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) { |
|
2792
|
2 |
|
return \intval($match[1], 16); |
|
2793
|
|
|
} |
|
2794
|
|
|
|
|
2795
|
2 |
|
return false; |
|
2796
|
|
|
} |
|
2797
|
|
|
|
|
2798
|
|
|
/** |
|
2799
|
|
|
* Converts a UTF-8 string to a series of HTML numbered entities. |
|
2800
|
|
|
* |
|
2801
|
|
|
* INFO: opposite to UTF8::html_decode() |
|
2802
|
|
|
* |
|
2803
|
|
|
* EXAMPLE: <code>UTF8::html_encode('中文空白'); // '中文空白'</code> |
|
2804
|
|
|
* |
|
2805
|
|
|
* @param string $str <p>The Unicode string to be encoded as numbered entities.</p> |
|
2806
|
|
|
* @param bool $keep_ascii_chars [optional] <p>Keep ASCII chars.</p> |
|
2807
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
2808
|
|
|
* |
|
2809
|
|
|
* @psalm-pure |
|
2810
|
|
|
* |
|
2811
|
|
|
* @return string HTML numbered entities |
|
2812
|
|
|
*/ |
|
2813
|
14 |
|
public static function html_encode( |
|
2814
|
|
|
string $str, |
|
2815
|
|
|
bool $keep_ascii_chars = false, |
|
2816
|
|
|
string $encoding = 'UTF-8' |
|
2817
|
|
|
): string { |
|
2818
|
14 |
|
if ($str === '') { |
|
2819
|
4 |
|
return ''; |
|
2820
|
|
|
} |
|
2821
|
|
|
|
|
2822
|
14 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
2823
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
2824
|
|
|
} |
|
2825
|
|
|
|
|
2826
|
|
|
// INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity |
|
2827
|
14 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
2828
|
14 |
|
if ($keep_ascii_chars) { |
|
2829
|
13 |
|
$start_code = 0x80; |
|
2830
|
|
|
} else { |
|
2831
|
3 |
|
$start_code = 0x00; |
|
2832
|
|
|
} |
|
2833
|
|
|
|
|
2834
|
14 |
|
if ($encoding === 'UTF-8') { |
|
2835
|
|
|
/** @var false|string|null $return - needed for PhpStan (stubs error) */ |
|
2836
|
14 |
|
$return = \mb_encode_numericentity( |
|
|
|
|
|
|
2837
|
14 |
|
$str, |
|
2838
|
14 |
|
[$start_code, 0xfffff, 0, 0xfffff] |
|
2839
|
|
|
); |
|
2840
|
14 |
|
if ($return !== null && $return !== false) { |
|
2841
|
14 |
|
return $return; |
|
2842
|
|
|
} |
|
2843
|
|
|
} |
|
2844
|
|
|
|
|
2845
|
|
|
/** @var false|string|null $return - needed for PhpStan (stubs error) */ |
|
2846
|
4 |
|
$return = \mb_encode_numericentity( |
|
2847
|
4 |
|
$str, |
|
2848
|
4 |
|
[$start_code, 0xfffff, 0, 0xfffff], |
|
2849
|
4 |
|
$encoding |
|
2850
|
|
|
); |
|
2851
|
4 |
|
if ($return !== null && $return !== false) { |
|
2852
|
4 |
|
return $return; |
|
2853
|
|
|
} |
|
2854
|
|
|
} |
|
2855
|
|
|
|
|
2856
|
|
|
// |
|
2857
|
|
|
// fallback via vanilla php |
|
2858
|
|
|
// |
|
2859
|
|
|
|
|
2860
|
|
|
return \implode( |
|
2861
|
|
|
'', |
|
2862
|
|
|
\array_map( |
|
2863
|
|
|
static function (string $chr) use ($keep_ascii_chars, $encoding): string { |
|
2864
|
|
|
return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding); |
|
2865
|
|
|
}, |
|
2866
|
|
|
self::str_split($str) |
|
2867
|
|
|
) |
|
2868
|
|
|
); |
|
2869
|
|
|
} |
|
2870
|
|
|
|
|
2871
|
|
|
/** |
|
2872
|
|
|
* UTF-8 version of html_entity_decode() |
|
2873
|
|
|
* |
|
2874
|
|
|
* The reason we are not using html_entity_decode() by itself is because |
|
2875
|
|
|
* while it is not technically correct to leave out the semicolon |
|
2876
|
|
|
* at the end of an entity most browsers will still interpret the entity |
|
2877
|
|
|
* correctly. html_entity_decode() does not convert entities without |
|
2878
|
|
|
* semicolons, so we are left with our own little solution here. Bummer. |
|
2879
|
|
|
* |
|
2880
|
|
|
* Convert all HTML entities to their applicable characters. |
|
2881
|
|
|
* |
|
2882
|
|
|
* INFO: opposite to UTF8::html_encode() |
|
2883
|
|
|
* |
|
2884
|
|
|
* EXAMPLE: <code>UTF8::html_entity_decode('中文空白'); // '中文空白'</code> |
|
2885
|
|
|
* |
|
2886
|
|
|
* @see http://php.net/manual/en/function.html-entity-decode.php |
|
2887
|
|
|
* |
|
2888
|
|
|
* @param string $str <p> |
|
2889
|
|
|
* The input string. |
|
2890
|
|
|
* </p> |
|
2891
|
|
|
* @param int|null $flags [optional] <p> |
|
2892
|
|
|
* A bitmask of one or more of the following flags, which specify how to handle quotes |
|
2893
|
|
|
* and which document type to use. The default is ENT_COMPAT | ENT_HTML401. |
|
2894
|
|
|
* <table> |
|
2895
|
|
|
* Available <i>flags</i> constants |
|
2896
|
|
|
* <tr valign="top"> |
|
2897
|
|
|
* <td>Constant Name</td> |
|
2898
|
|
|
* <td>Description</td> |
|
2899
|
|
|
* </tr> |
|
2900
|
|
|
* <tr valign="top"> |
|
2901
|
|
|
* <td><b>ENT_COMPAT</b></td> |
|
2902
|
|
|
* <td>Will convert double-quotes and leave single-quotes alone.</td> |
|
2903
|
|
|
* </tr> |
|
2904
|
|
|
* <tr valign="top"> |
|
2905
|
|
|
* <td><b>ENT_QUOTES</b></td> |
|
2906
|
|
|
* <td>Will convert both double and single quotes.</td> |
|
2907
|
|
|
* </tr> |
|
2908
|
|
|
* <tr valign="top"> |
|
2909
|
|
|
* <td><b>ENT_NOQUOTES</b></td> |
|
2910
|
|
|
* <td>Will leave both double and single quotes unconverted.</td> |
|
2911
|
|
|
* </tr> |
|
2912
|
|
|
* <tr valign="top"> |
|
2913
|
|
|
* <td><b>ENT_HTML401</b></td> |
|
2914
|
|
|
* <td> |
|
2915
|
|
|
* Handle code as HTML 4.01. |
|
2916
|
|
|
* </td> |
|
2917
|
|
|
* </tr> |
|
2918
|
|
|
* <tr valign="top"> |
|
2919
|
|
|
* <td><b>ENT_XML1</b></td> |
|
2920
|
|
|
* <td> |
|
2921
|
|
|
* Handle code as XML 1. |
|
2922
|
|
|
* </td> |
|
2923
|
|
|
* </tr> |
|
2924
|
|
|
* <tr valign="top"> |
|
2925
|
|
|
* <td><b>ENT_XHTML</b></td> |
|
2926
|
|
|
* <td> |
|
2927
|
|
|
* Handle code as XHTML. |
|
2928
|
|
|
* </td> |
|
2929
|
|
|
* </tr> |
|
2930
|
|
|
* <tr valign="top"> |
|
2931
|
|
|
* <td><b>ENT_HTML5</b></td> |
|
2932
|
|
|
* <td> |
|
2933
|
|
|
* Handle code as HTML 5. |
|
2934
|
|
|
* </td> |
|
2935
|
|
|
* </tr> |
|
2936
|
|
|
* </table> |
|
2937
|
|
|
* </p> |
|
2938
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
2939
|
|
|
* |
|
2940
|
|
|
* @psalm-pure |
|
2941
|
|
|
* |
|
2942
|
|
|
* @return string the decoded string |
|
2943
|
|
|
*/ |
|
2944
|
34 |
|
public static function html_entity_decode( |
|
2945
|
|
|
string $str, |
|
2946
|
|
|
int $flags = null, |
|
2947
|
|
|
string $encoding = 'UTF-8' |
|
2948
|
|
|
): string { |
|
2949
|
|
|
if ( |
|
2950
|
34 |
|
!isset($str[3]) // examples: &; || &x; |
|
2951
|
|
|
|| |
|
2952
|
34 |
|
\strpos($str, '&') === false // no "&" |
|
2953
|
|
|
) { |
|
2954
|
23 |
|
return $str; |
|
2955
|
|
|
} |
|
2956
|
|
|
|
|
2957
|
34 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
2958
|
9 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
2959
|
|
|
} |
|
2960
|
|
|
|
|
2961
|
34 |
|
if ($flags === null) { |
|
2962
|
11 |
|
$flags = \ENT_QUOTES | \ENT_HTML5; |
|
2963
|
|
|
} |
|
2964
|
|
|
|
|
2965
|
|
|
if ( |
|
2966
|
34 |
|
$encoding !== 'UTF-8' |
|
2967
|
|
|
&& |
|
2968
|
34 |
|
$encoding !== 'ISO-8859-1' |
|
2969
|
|
|
&& |
|
2970
|
34 |
|
$encoding !== 'WINDOWS-1252' |
|
2971
|
|
|
&& |
|
2972
|
34 |
|
self::$SUPPORT['mbstring'] === false |
|
2973
|
|
|
) { |
|
2974
|
|
|
/** |
|
2975
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
2976
|
|
|
*/ |
|
2977
|
|
|
\trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
|
2978
|
|
|
} |
|
2979
|
|
|
|
|
2980
|
|
|
do { |
|
2981
|
34 |
|
$str_compare = $str; |
|
2982
|
|
|
|
|
2983
|
34 |
|
if (\strpos($str, '&') !== false) { |
|
2984
|
34 |
|
if (\strpos($str, '&#') !== false) { |
|
2985
|
|
|
// decode also numeric & UTF16 two byte entities |
|
2986
|
25 |
|
$str = (string) \preg_replace( |
|
2987
|
25 |
|
'/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S', |
|
2988
|
25 |
|
'$1;', |
|
2989
|
25 |
|
$str |
|
2990
|
|
|
); |
|
2991
|
|
|
} |
|
2992
|
|
|
|
|
2993
|
34 |
|
$str = \html_entity_decode( |
|
2994
|
34 |
|
$str, |
|
2995
|
34 |
|
$flags, |
|
2996
|
34 |
|
$encoding |
|
2997
|
|
|
); |
|
2998
|
|
|
} |
|
2999
|
34 |
|
} while ($str_compare !== $str); |
|
3000
|
|
|
|
|
3001
|
34 |
|
return $str; |
|
3002
|
|
|
} |
|
3003
|
|
|
|
|
3004
|
|
|
/** |
|
3005
|
|
|
* Create a escape html version of the string via "UTF8::htmlspecialchars()". |
|
3006
|
|
|
* |
|
3007
|
|
|
* @param string $str |
|
3008
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
3009
|
|
|
* |
|
3010
|
|
|
* @psalm-pure |
|
3011
|
|
|
* |
|
3012
|
|
|
* @return string |
|
3013
|
|
|
*/ |
|
3014
|
6 |
|
public static function html_escape(string $str, string $encoding = 'UTF-8'): string |
|
3015
|
|
|
{ |
|
3016
|
6 |
|
return self::htmlspecialchars( |
|
3017
|
6 |
|
$str, |
|
3018
|
6 |
|
\ENT_QUOTES | \ENT_SUBSTITUTE, |
|
3019
|
6 |
|
$encoding |
|
3020
|
|
|
); |
|
3021
|
|
|
} |
|
3022
|
|
|
|
|
3023
|
|
|
/** |
|
3024
|
|
|
* Remove empty html-tag. |
|
3025
|
|
|
* |
|
3026
|
|
|
* e.g.: <pre><tag></tag></pre> |
|
3027
|
|
|
* |
|
3028
|
|
|
* @param string $str |
|
3029
|
|
|
* |
|
3030
|
|
|
* @psalm-pure |
|
3031
|
|
|
* |
|
3032
|
|
|
* @return string |
|
3033
|
|
|
*/ |
|
3034
|
1 |
|
public static function html_stripe_empty_tags(string $str): string |
|
3035
|
|
|
{ |
|
3036
|
1 |
|
return (string) \preg_replace( |
|
3037
|
1 |
|
'/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u', |
|
3038
|
1 |
|
'', |
|
3039
|
1 |
|
$str |
|
3040
|
|
|
); |
|
3041
|
|
|
} |
|
3042
|
|
|
|
|
3043
|
|
|
/** |
|
3044
|
|
|
* Convert all applicable characters to HTML entities: UTF-8 version of htmlentities(). |
|
3045
|
|
|
* |
|
3046
|
|
|
* EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '<白-öäü>'</code> |
|
3047
|
|
|
* |
|
3048
|
|
|
* @see http://php.net/manual/en/function.htmlentities.php |
|
3049
|
|
|
* |
|
3050
|
|
|
* @param string $str <p> |
|
3051
|
|
|
* The input string. |
|
3052
|
|
|
* </p> |
|
3053
|
|
|
* @param int $flags [optional] <p> |
|
3054
|
|
|
* A bitmask of one or more of the following flags, which specify how to handle |
|
3055
|
|
|
* quotes, invalid code unit sequences and the used document type. The default is |
|
3056
|
|
|
* ENT_COMPAT | ENT_HTML401. |
|
3057
|
|
|
* <table> |
|
3058
|
|
|
* Available <i>flags</i> constants |
|
3059
|
|
|
* <tr valign="top"> |
|
3060
|
|
|
* <td>Constant Name</td> |
|
3061
|
|
|
* <td>Description</td> |
|
3062
|
|
|
* </tr> |
|
3063
|
|
|
* <tr valign="top"> |
|
3064
|
|
|
* <td><b>ENT_COMPAT</b></td> |
|
3065
|
|
|
* <td>Will convert double-quotes and leave single-quotes alone.</td> |
|
3066
|
|
|
* </tr> |
|
3067
|
|
|
* <tr valign="top"> |
|
3068
|
|
|
* <td><b>ENT_QUOTES</b></td> |
|
3069
|
|
|
* <td>Will convert both double and single quotes.</td> |
|
3070
|
|
|
* </tr> |
|
3071
|
|
|
* <tr valign="top"> |
|
3072
|
|
|
* <td><b>ENT_NOQUOTES</b></td> |
|
3073
|
|
|
* <td>Will leave both double and single quotes unconverted.</td> |
|
3074
|
|
|
* </tr> |
|
3075
|
|
|
* <tr valign="top"> |
|
3076
|
|
|
* <td><b>ENT_IGNORE</b></td> |
|
3077
|
|
|
* <td> |
|
3078
|
|
|
* Silently discard invalid code unit sequences instead of returning |
|
3079
|
|
|
* an empty string. Using this flag is discouraged as it |
|
3080
|
|
|
* may have security implications. |
|
3081
|
|
|
* </td> |
|
3082
|
|
|
* </tr> |
|
3083
|
|
|
* <tr valign="top"> |
|
3084
|
|
|
* <td><b>ENT_SUBSTITUTE</b></td> |
|
3085
|
|
|
* <td> |
|
3086
|
|
|
* Replace invalid code unit sequences with a Unicode Replacement Character |
|
3087
|
|
|
* U+FFFD (UTF-8) or &#38;#FFFD; (otherwise) instead of returning an empty |
|
3088
|
|
|
* string. |
|
3089
|
|
|
* </td> |
|
3090
|
|
|
* </tr> |
|
3091
|
|
|
* <tr valign="top"> |
|
3092
|
|
|
* <td><b>ENT_DISALLOWED</b></td> |
|
3093
|
|
|
* <td> |
|
3094
|
|
|
* Replace invalid code points for the given document type with a |
|
3095
|
|
|
* Unicode Replacement Character U+FFFD (UTF-8) or &#38;#FFFD; |
|
3096
|
|
|
* (otherwise) instead of leaving them as is. This may be useful, for |
|
3097
|
|
|
* instance, to ensure the well-formedness of XML documents with |
|
3098
|
|
|
* embedded external content. |
|
3099
|
|
|
* </td> |
|
3100
|
|
|
* </tr> |
|
3101
|
|
|
* <tr valign="top"> |
|
3102
|
|
|
* <td><b>ENT_HTML401</b></td> |
|
3103
|
|
|
* <td> |
|
3104
|
|
|
* Handle code as HTML 4.01. |
|
3105
|
|
|
* </td> |
|
3106
|
|
|
* </tr> |
|
3107
|
|
|
* <tr valign="top"> |
|
3108
|
|
|
* <td><b>ENT_XML1</b></td> |
|
3109
|
|
|
* <td> |
|
3110
|
|
|
* Handle code as XML 1. |
|
3111
|
|
|
* </td> |
|
3112
|
|
|
* </tr> |
|
3113
|
|
|
* <tr valign="top"> |
|
3114
|
|
|
* <td><b>ENT_XHTML</b></td> |
|
3115
|
|
|
* <td> |
|
3116
|
|
|
* Handle code as XHTML. |
|
3117
|
|
|
* </td> |
|
3118
|
|
|
* </tr> |
|
3119
|
|
|
* <tr valign="top"> |
|
3120
|
|
|
* <td><b>ENT_HTML5</b></td> |
|
3121
|
|
|
* <td> |
|
3122
|
|
|
* Handle code as HTML 5. |
|
3123
|
|
|
* </td> |
|
3124
|
|
|
* </tr> |
|
3125
|
|
|
* </table> |
|
3126
|
|
|
* </p> |
|
3127
|
|
|
* @param string $encoding [optional] <p> |
|
3128
|
|
|
* Like <b>htmlspecialchars</b>, |
|
3129
|
|
|
* <b>htmlentities</b> takes an optional third argument |
|
3130
|
|
|
* <i>encoding</i> which defines encoding used in |
|
3131
|
|
|
* conversion. |
|
3132
|
|
|
* Although this argument is technically optional, you are highly |
|
3133
|
|
|
* encouraged to specify the correct value for your code. |
|
3134
|
|
|
* </p> |
|
3135
|
|
|
* @param bool $double_encode [optional] <p> |
|
3136
|
|
|
* When <i>double_encode</i> is turned off PHP will not |
|
3137
|
|
|
* encode existing html entities. The default is to convert everything. |
|
3138
|
|
|
* </p> |
|
3139
|
|
|
* |
|
3140
|
|
|
* @psalm-pure |
|
3141
|
|
|
* |
|
3142
|
|
|
* @return string |
|
3143
|
|
|
* <p> |
|
3144
|
|
|
* The encoded string. |
|
3145
|
|
|
* <br><br> |
|
3146
|
|
|
* If the input <i>string</i> contains an invalid code unit |
|
3147
|
|
|
* sequence within the given <i>encoding</i> an empty string |
|
3148
|
|
|
* will be returned, unless either the <b>ENT_IGNORE</b> or |
|
3149
|
|
|
* <b>ENT_SUBSTITUTE</b> flags are set. |
|
3150
|
|
|
* </p> |
|
3151
|
|
|
*/ |
|
3152
|
9 |
|
public static function htmlentities( |
|
3153
|
|
|
string $str, |
|
3154
|
|
|
int $flags = \ENT_COMPAT, |
|
3155
|
|
|
string $encoding = 'UTF-8', |
|
3156
|
|
|
bool $double_encode = true |
|
3157
|
|
|
): string { |
|
3158
|
9 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
3159
|
7 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
3160
|
|
|
} |
|
3161
|
|
|
|
|
3162
|
9 |
|
$str = \htmlentities( |
|
3163
|
9 |
|
$str, |
|
3164
|
9 |
|
$flags, |
|
3165
|
9 |
|
$encoding, |
|
3166
|
9 |
|
$double_encode |
|
3167
|
|
|
); |
|
3168
|
|
|
|
|
3169
|
|
|
/** |
|
3170
|
|
|
* PHP doesn't replace a backslash to its html entity since this is something |
|
3171
|
|
|
* that's mostly used to escape characters when inserting in a database. Since |
|
3172
|
|
|
* we're using a decent database layer, we don't need this shit and we're replacing |
|
3173
|
|
|
* the double backslashes by its' html entity equivalent. |
|
3174
|
|
|
* |
|
3175
|
|
|
* https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303 |
|
3176
|
|
|
*/ |
|
3177
|
9 |
|
$str = \str_replace('\\', '\', $str); |
|
3178
|
|
|
|
|
3179
|
9 |
|
return self::html_encode($str, true, $encoding); |
|
3180
|
|
|
} |
|
3181
|
|
|
|
|
3182
|
|
|
/** |
|
3183
|
|
|
* Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars() |
|
3184
|
|
|
* |
|
3185
|
|
|
* INFO: Take a look at "UTF8::htmlentities()" |
|
3186
|
|
|
* |
|
3187
|
|
|
* EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '<白-öäü>'</code> |
|
3188
|
|
|
* |
|
3189
|
|
|
* @see http://php.net/manual/en/function.htmlspecialchars.php |
|
3190
|
|
|
* |
|
3191
|
|
|
* @param string $str <p> |
|
3192
|
|
|
* The string being converted. |
|
3193
|
|
|
* </p> |
|
3194
|
|
|
* @param int $flags [optional] <p> |
|
3195
|
|
|
* A bitmask of one or more of the following flags, which specify how to handle |
|
3196
|
|
|
* quotes, invalid code unit sequences and the used document type. The default is |
|
3197
|
|
|
* ENT_COMPAT | ENT_HTML401. |
|
3198
|
|
|
* <table> |
|
3199
|
|
|
* Available <i>flags</i> constants |
|
3200
|
|
|
* <tr valign="top"> |
|
3201
|
|
|
* <td>Constant Name</td> |
|
3202
|
|
|
* <td>Description</td> |
|
3203
|
|
|
* </tr> |
|
3204
|
|
|
* <tr valign="top"> |
|
3205
|
|
|
* <td><b>ENT_COMPAT</b></td> |
|
3206
|
|
|
* <td>Will convert double-quotes and leave single-quotes alone.</td> |
|
3207
|
|
|
* </tr> |
|
3208
|
|
|
* <tr valign="top"> |
|
3209
|
|
|
* <td><b>ENT_QUOTES</b></td> |
|
3210
|
|
|
* <td>Will convert both double and single quotes.</td> |
|
3211
|
|
|
* </tr> |
|
3212
|
|
|
* <tr valign="top"> |
|
3213
|
|
|
* <td><b>ENT_NOQUOTES</b></td> |
|
3214
|
|
|
* <td>Will leave both double and single quotes unconverted.</td> |
|
3215
|
|
|
* </tr> |
|
3216
|
|
|
* <tr valign="top"> |
|
3217
|
|
|
* <td><b>ENT_IGNORE</b></td> |
|
3218
|
|
|
* <td> |
|
3219
|
|
|
* Silently discard invalid code unit sequences instead of returning |
|
3220
|
|
|
* an empty string. Using this flag is discouraged as it |
|
3221
|
|
|
* may have security implications. |
|
3222
|
|
|
* </td> |
|
3223
|
|
|
* </tr> |
|
3224
|
|
|
* <tr valign="top"> |
|
3225
|
|
|
* <td><b>ENT_SUBSTITUTE</b></td> |
|
3226
|
|
|
* <td> |
|
3227
|
|
|
* Replace invalid code unit sequences with a Unicode Replacement Character |
|
3228
|
|
|
* U+FFFD (UTF-8) or &#38;#FFFD; (otherwise) instead of returning an empty |
|
3229
|
|
|
* string. |
|
3230
|
|
|
* </td> |
|
3231
|
|
|
* </tr> |
|
3232
|
|
|
* <tr valign="top"> |
|
3233
|
|
|
* <td><b>ENT_DISALLOWED</b></td> |
|
3234
|
|
|
* <td> |
|
3235
|
|
|
* Replace invalid code points for the given document type with a |
|
3236
|
|
|
* Unicode Replacement Character U+FFFD (UTF-8) or &#38;#FFFD; |
|
3237
|
|
|
* (otherwise) instead of leaving them as is. This may be useful, for |
|
3238
|
|
|
* instance, to ensure the well-formedness of XML documents with |
|
3239
|
|
|
* embedded external content. |
|
3240
|
|
|
* </td> |
|
3241
|
|
|
* </tr> |
|
3242
|
|
|
* <tr valign="top"> |
|
3243
|
|
|
* <td><b>ENT_HTML401</b></td> |
|
3244
|
|
|
* <td> |
|
3245
|
|
|
* Handle code as HTML 4.01. |
|
3246
|
|
|
* </td> |
|
3247
|
|
|
* </tr> |
|
3248
|
|
|
* <tr valign="top"> |
|
3249
|
|
|
* <td><b>ENT_XML1</b></td> |
|
3250
|
|
|
* <td> |
|
3251
|
|
|
* Handle code as XML 1. |
|
3252
|
|
|
* </td> |
|
3253
|
|
|
* </tr> |
|
3254
|
|
|
* <tr valign="top"> |
|
3255
|
|
|
* <td><b>ENT_XHTML</b></td> |
|
3256
|
|
|
* <td> |
|
3257
|
|
|
* Handle code as XHTML. |
|
3258
|
|
|
* </td> |
|
3259
|
|
|
* </tr> |
|
3260
|
|
|
* <tr valign="top"> |
|
3261
|
|
|
* <td><b>ENT_HTML5</b></td> |
|
3262
|
|
|
* <td> |
|
3263
|
|
|
* Handle code as HTML 5. |
|
3264
|
|
|
* </td> |
|
3265
|
|
|
* </tr> |
|
3266
|
|
|
* </table> |
|
3267
|
|
|
* </p> |
|
3268
|
|
|
* @param string $encoding [optional] <p> |
|
3269
|
|
|
* Defines encoding used in conversion. |
|
3270
|
|
|
* </p> |
|
3271
|
|
|
* <p> |
|
3272
|
|
|
* For the purposes of this function, the encodings |
|
3273
|
|
|
* ISO-8859-1, ISO-8859-15, |
|
3274
|
|
|
* UTF-8, cp866, |
|
3275
|
|
|
* cp1251, cp1252, and |
|
3276
|
|
|
* KOI8-R are effectively equivalent, provided the |
|
3277
|
|
|
* <i>string</i> itself is valid for the encoding, as |
|
3278
|
|
|
* the characters affected by <b>htmlspecialchars</b> occupy |
|
3279
|
|
|
* the same positions in all of these encodings. |
|
3280
|
|
|
* </p> |
|
3281
|
|
|
* @param bool $double_encode [optional] <p> |
|
3282
|
|
|
* When <i>double_encode</i> is turned off PHP will not |
|
3283
|
|
|
* encode existing html entities, the default is to convert everything. |
|
3284
|
|
|
* </p> |
|
3285
|
|
|
* |
|
3286
|
|
|
* @psalm-pure |
|
3287
|
|
|
* |
|
3288
|
|
|
* @return string the converted string. |
|
3289
|
|
|
* </p> |
|
3290
|
|
|
* <p> |
|
3291
|
|
|
* If the input <i>string</i> contains an invalid code unit |
|
3292
|
|
|
* sequence within the given <i>encoding</i> an empty string |
|
3293
|
|
|
* will be returned, unless either the <b>ENT_IGNORE</b> or |
|
3294
|
|
|
* <b>ENT_SUBSTITUTE</b> flags are set |
|
3295
|
|
|
*/ |
|
3296
|
8 |
|
public static function htmlspecialchars( |
|
3297
|
|
|
string $str, |
|
3298
|
|
|
int $flags = \ENT_COMPAT, |
|
3299
|
|
|
string $encoding = 'UTF-8', |
|
3300
|
|
|
bool $double_encode = true |
|
3301
|
|
|
): string { |
|
3302
|
8 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
3303
|
8 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
3304
|
|
|
} |
|
3305
|
|
|
|
|
3306
|
8 |
|
return \htmlspecialchars( |
|
3307
|
8 |
|
$str, |
|
3308
|
8 |
|
$flags, |
|
3309
|
8 |
|
$encoding, |
|
3310
|
8 |
|
$double_encode |
|
3311
|
|
|
); |
|
3312
|
|
|
} |
|
3313
|
|
|
|
|
3314
|
|
|
/** |
|
3315
|
|
|
* Checks whether iconv is available on the server. |
|
3316
|
|
|
* |
|
3317
|
|
|
* @psalm-pure |
|
3318
|
|
|
* |
|
3319
|
|
|
* @return bool |
|
3320
|
|
|
* <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> |
|
3321
|
|
|
* |
|
3322
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
|
3323
|
|
|
*/ |
|
3324
|
|
|
public static function iconv_loaded(): bool |
|
3325
|
|
|
{ |
|
3326
|
|
|
return \extension_loaded('iconv'); |
|
3327
|
|
|
} |
|
3328
|
|
|
|
|
3329
|
|
|
/** |
|
3330
|
|
|
* Converts Integer to hexadecimal U+xxxx code point representation. |
|
3331
|
|
|
* |
|
3332
|
|
|
* INFO: opposite to UTF8::hex_to_int() |
|
3333
|
|
|
* |
|
3334
|
|
|
* EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code> |
|
3335
|
|
|
* |
|
3336
|
|
|
* @param int $int <p>The integer to be converted to hexadecimal code point.</p> |
|
3337
|
|
|
* @param string $prefix [optional] |
|
3338
|
|
|
* |
|
3339
|
|
|
* @psalm-pure |
|
3340
|
|
|
* |
|
3341
|
|
|
* @return string the code point, or empty string on failure |
|
3342
|
|
|
*/ |
|
3343
|
6 |
|
public static function int_to_hex(int $int, string $prefix = 'U+'): string |
|
3344
|
|
|
{ |
|
3345
|
6 |
|
$hex = \dechex($int); |
|
3346
|
|
|
|
|
3347
|
6 |
|
$hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex); |
|
3348
|
|
|
|
|
3349
|
6 |
|
return $prefix . $hex . ''; |
|
3350
|
|
|
} |
|
3351
|
|
|
|
|
3352
|
|
|
/** |
|
3353
|
|
|
* Checks whether intl-char is available on the server. |
|
3354
|
|
|
* |
|
3355
|
|
|
* @psalm-pure |
|
3356
|
|
|
* |
|
3357
|
|
|
* @return bool |
|
3358
|
|
|
* <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> |
|
3359
|
|
|
* |
|
3360
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
|
3361
|
|
|
*/ |
|
3362
|
|
|
public static function intlChar_loaded(): bool |
|
3363
|
|
|
{ |
|
3364
|
|
|
return \class_exists('IntlChar'); |
|
3365
|
|
|
} |
|
3366
|
|
|
|
|
3367
|
|
|
/** |
|
3368
|
|
|
* Checks whether intl is available on the server. |
|
3369
|
|
|
* |
|
3370
|
|
|
* @psalm-pure |
|
3371
|
|
|
* |
|
3372
|
|
|
* @return bool |
|
3373
|
|
|
* <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> |
|
3374
|
|
|
* |
|
3375
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
|
3376
|
|
|
*/ |
|
3377
|
5 |
|
public static function intl_loaded(): bool |
|
3378
|
|
|
{ |
|
3379
|
5 |
|
return \extension_loaded('intl'); |
|
3380
|
|
|
} |
|
3381
|
|
|
|
|
3382
|
|
|
/** |
|
3383
|
|
|
* Returns true if the string contains only alphabetic chars, false otherwise. |
|
3384
|
|
|
* |
|
3385
|
|
|
* @param string $str <p>The input string.</p> |
|
3386
|
|
|
* |
|
3387
|
|
|
* @psalm-pure |
|
3388
|
|
|
* |
|
3389
|
|
|
* @return bool |
|
3390
|
|
|
* <p>Whether or not $str contains only alphabetic chars.</p> |
|
3391
|
|
|
*/ |
|
3392
|
10 |
|
public static function is_alpha(string $str): bool |
|
3393
|
|
|
{ |
|
3394
|
10 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
3395
|
10 |
|
return \mb_ereg_match('^[[:alpha:]]*$', $str); |
|
3396
|
|
|
} |
|
3397
|
|
|
|
|
3398
|
|
|
return self::str_matches_pattern($str, '^[[:alpha:]]*$'); |
|
3399
|
|
|
} |
|
3400
|
|
|
|
|
3401
|
|
|
/** |
|
3402
|
|
|
* Returns true if the string contains only alphabetic and numeric chars, false otherwise. |
|
3403
|
|
|
* |
|
3404
|
|
|
* @param string $str <p>The input string.</p> |
|
3405
|
|
|
* |
|
3406
|
|
|
* @psalm-pure |
|
3407
|
|
|
* |
|
3408
|
|
|
* @return bool |
|
3409
|
|
|
* <p>Whether or not $str contains only alphanumeric chars.</p> |
|
3410
|
|
|
*/ |
|
3411
|
13 |
|
public static function is_alphanumeric(string $str): bool |
|
3412
|
|
|
{ |
|
3413
|
13 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
3414
|
13 |
|
return \mb_ereg_match('^[[:alnum:]]*$', $str); |
|
3415
|
|
|
} |
|
3416
|
|
|
|
|
3417
|
|
|
return self::str_matches_pattern($str, '^[[:alnum:]]*$'); |
|
3418
|
|
|
} |
|
3419
|
|
|
|
|
3420
|
|
|
/** |
|
3421
|
|
|
* Returns true if the string contains only punctuation chars, false otherwise. |
|
3422
|
|
|
* |
|
3423
|
|
|
* @param string $str <p>The input string.</p> |
|
3424
|
|
|
* |
|
3425
|
|
|
* @psalm-pure |
|
3426
|
|
|
* |
|
3427
|
|
|
* @return bool |
|
3428
|
|
|
* <p>Whether or not $str contains only punctuation chars.</p> |
|
3429
|
|
|
*/ |
|
3430
|
10 |
|
public static function is_punctuation(string $str): bool |
|
3431
|
|
|
{ |
|
3432
|
10 |
|
return self::str_matches_pattern($str, '^[[:punct:]]*$'); |
|
3433
|
|
|
} |
|
3434
|
|
|
|
|
3435
|
|
|
/** |
|
3436
|
|
|
* Returns true if the string contains only printable (non-invisible) chars, false otherwise. |
|
3437
|
|
|
* |
|
3438
|
|
|
* @param string $str <p>The input string.</p> |
|
3439
|
|
|
* @param bool $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p> |
|
3440
|
|
|
* |
|
3441
|
|
|
* @psalm-pure |
|
3442
|
|
|
* |
|
3443
|
|
|
* @return bool |
|
3444
|
|
|
* <p>Whether or not $str contains only printable (non-invisible) chars.</p> |
|
3445
|
|
|
*/ |
|
3446
|
1 |
|
public static function is_printable(string $str, bool $ignore_control_characters = false): bool |
|
3447
|
|
|
{ |
|
3448
|
1 |
|
return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str; |
|
3449
|
|
|
} |
|
3450
|
|
|
|
|
3451
|
|
|
/** |
|
3452
|
|
|
* Checks if a string is 7 bit ASCII. |
|
3453
|
|
|
* |
|
3454
|
|
|
* EXAMPLE: <code>UTF8::is_ascii('白'); // false</code> |
|
3455
|
|
|
* |
|
3456
|
|
|
* @param string $str <p>The string to check.</p> |
|
3457
|
|
|
* |
|
3458
|
|
|
* @psalm-pure |
|
3459
|
|
|
* |
|
3460
|
|
|
* @return bool |
|
3461
|
|
|
* <p> |
|
3462
|
|
|
* <strong>true</strong> if it is ASCII<br> |
|
3463
|
|
|
* <strong>false</strong> otherwise |
|
3464
|
|
|
* </p> |
|
3465
|
|
|
*/ |
|
3466
|
8 |
|
public static function is_ascii(string $str): bool |
|
3467
|
|
|
{ |
|
3468
|
8 |
|
return ASCII::is_ascii($str); |
|
3469
|
|
|
} |
|
3470
|
|
|
|
|
3471
|
|
|
/** |
|
3472
|
|
|
* Returns true if the string is base64 encoded, false otherwise. |
|
3473
|
|
|
* |
|
3474
|
|
|
* EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code> |
|
3475
|
|
|
* |
|
3476
|
|
|
* @param string|null $str <p>The input string.</p> |
|
3477
|
|
|
* @param bool $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p> |
|
3478
|
|
|
* |
|
3479
|
|
|
* @psalm-pure |
|
3480
|
|
|
* |
|
3481
|
|
|
* @return bool |
|
3482
|
|
|
* <p>Whether or not $str is base64 encoded.</p> |
|
3483
|
|
|
*/ |
|
3484
|
16 |
|
public static function is_base64($str, bool $empty_string_is_valid = false): bool |
|
3485
|
|
|
{ |
|
3486
|
|
|
if ( |
|
3487
|
16 |
|
!$empty_string_is_valid |
|
3488
|
|
|
&& |
|
3489
|
16 |
|
$str === '' |
|
3490
|
|
|
) { |
|
3491
|
3 |
|
return false; |
|
3492
|
|
|
} |
|
3493
|
|
|
|
|
3494
|
15 |
|
if (!\is_string($str)) { |
|
3495
|
2 |
|
return false; |
|
3496
|
|
|
} |
|
3497
|
|
|
|
|
3498
|
15 |
|
$base64String = \base64_decode($str, true); |
|
3499
|
|
|
|
|
3500
|
15 |
|
return $base64String !== false && \base64_encode($base64String) === $str; |
|
3501
|
|
|
} |
|
3502
|
|
|
|
|
3503
|
|
|
/** |
|
3504
|
|
|
* Check if the input is binary... (is look like a hack). |
|
3505
|
|
|
* |
|
3506
|
|
|
* EXAMPLE: <code>UTF8::is_binary(01); // true</code> |
|
3507
|
|
|
* |
|
3508
|
|
|
* @param int|string $input |
|
3509
|
|
|
* @param bool $strict |
|
3510
|
|
|
* |
|
3511
|
|
|
* @psalm-pure |
|
3512
|
|
|
* |
|
3513
|
|
|
* @return bool |
|
3514
|
|
|
*/ |
|
3515
|
39 |
|
public static function is_binary($input, bool $strict = false): bool |
|
3516
|
|
|
{ |
|
3517
|
39 |
|
$input = (string) $input; |
|
3518
|
39 |
|
if ($input === '') { |
|
3519
|
10 |
|
return false; |
|
3520
|
|
|
} |
|
3521
|
|
|
|
|
3522
|
39 |
|
if (\preg_match('~^[01]+$~', $input)) { |
|
3523
|
13 |
|
return true; |
|
3524
|
|
|
} |
|
3525
|
|
|
|
|
3526
|
39 |
|
$ext = self::get_file_type($input); |
|
3527
|
39 |
|
if ($ext['type'] === 'binary') { |
|
3528
|
7 |
|
return true; |
|
3529
|
|
|
} |
|
3530
|
|
|
|
|
3531
|
38 |
|
if (!$strict) { |
|
3532
|
7 |
|
$test_length = \strlen($input); |
|
3533
|
7 |
|
$test_null_counting = \substr_count($input, "\x0", 0, $test_length); |
|
3534
|
7 |
|
if (($test_null_counting / $test_length) > 0.25) { |
|
3535
|
5 |
|
return true; |
|
3536
|
|
|
} |
|
3537
|
|
|
} |
|
3538
|
|
|
|
|
3539
|
38 |
|
if ($strict) { |
|
3540
|
38 |
|
if (self::$SUPPORT['finfo'] === false) { |
|
3541
|
|
|
throw new \RuntimeException('ext-fileinfo: is not installed'); |
|
3542
|
|
|
} |
|
3543
|
|
|
|
|
3544
|
|
|
/** |
|
3545
|
|
|
* @psalm-suppress ImpureMethodCall - it will return the same result for the same file ... |
|
3546
|
|
|
*/ |
|
3547
|
38 |
|
$finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input); |
|
3548
|
38 |
|
if ($finfo_encoding && $finfo_encoding === 'binary') { |
|
3549
|
20 |
|
return true; |
|
3550
|
|
|
} |
|
3551
|
|
|
} |
|
3552
|
|
|
|
|
3553
|
33 |
|
return false; |
|
3554
|
|
|
} |
|
3555
|
|
|
|
|
3556
|
|
|
/** |
|
3557
|
|
|
* Check if the file is binary. |
|
3558
|
|
|
* |
|
3559
|
|
|
* EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code> |
|
3560
|
|
|
* |
|
3561
|
|
|
* @param string $file |
|
3562
|
|
|
* |
|
3563
|
|
|
* @return bool |
|
3564
|
|
|
*/ |
|
3565
|
6 |
|
public static function is_binary_file($file): bool |
|
3566
|
|
|
{ |
|
3567
|
|
|
// init |
|
3568
|
6 |
|
$block = ''; |
|
3569
|
|
|
|
|
3570
|
6 |
|
$fp = \fopen($file, 'rb'); |
|
3571
|
6 |
|
if (\is_resource($fp)) { |
|
3572
|
6 |
|
$block = \fread($fp, 512); |
|
3573
|
6 |
|
\fclose($fp); |
|
3574
|
|
|
} |
|
3575
|
|
|
|
|
3576
|
6 |
|
if ($block === '' || $block === false) { |
|
3577
|
2 |
|
return false; |
|
3578
|
|
|
} |
|
3579
|
|
|
|
|
3580
|
6 |
|
return self::is_binary($block, true); |
|
3581
|
|
|
} |
|
3582
|
|
|
|
|
3583
|
|
|
/** |
|
3584
|
|
|
* Returns true if the string contains only whitespace chars, false otherwise. |
|
3585
|
|
|
* |
|
3586
|
|
|
* @param string $str <p>The input string.</p> |
|
3587
|
|
|
* |
|
3588
|
|
|
* @psalm-pure |
|
3589
|
|
|
* |
|
3590
|
|
|
* @return bool |
|
3591
|
|
|
* <p>Whether or not $str contains only whitespace characters.</p> |
|
3592
|
|
|
*/ |
|
3593
|
15 |
|
public static function is_blank(string $str): bool |
|
3594
|
|
|
{ |
|
3595
|
15 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
3596
|
15 |
|
return \mb_ereg_match('^[[:space:]]*$', $str); |
|
3597
|
|
|
} |
|
3598
|
|
|
|
|
3599
|
|
|
return self::str_matches_pattern($str, '^[[:space:]]*$'); |
|
3600
|
|
|
} |
|
3601
|
|
|
|
|
3602
|
|
|
/** |
|
3603
|
|
|
* Checks if the given string is equal to any "Byte Order Mark". |
|
3604
|
|
|
* |
|
3605
|
|
|
* WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string. |
|
3606
|
|
|
* |
|
3607
|
|
|
* EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code> |
|
3608
|
|
|
* |
|
3609
|
|
|
* @param string $str <p>The input string.</p> |
|
3610
|
|
|
* |
|
3611
|
|
|
* @psalm-pure |
|
3612
|
|
|
* |
|
3613
|
|
|
* @return bool |
|
3614
|
|
|
* <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p> |
|
3615
|
|
|
*/ |
|
3616
|
2 |
|
public static function is_bom($str): bool |
|
3617
|
|
|
{ |
|
3618
|
|
|
/** @noinspection PhpUnusedLocalVariableInspection */ |
|
3619
|
2 |
|
foreach (self::$BOM as $bom_string => &$bom_byte_length) { |
|
3620
|
2 |
|
if ($str === $bom_string) { |
|
3621
|
2 |
|
return true; |
|
3622
|
|
|
} |
|
3623
|
|
|
} |
|
3624
|
|
|
|
|
3625
|
2 |
|
return false; |
|
3626
|
|
|
} |
|
3627
|
|
|
|
|
3628
|
|
|
/** |
|
3629
|
|
|
* Determine whether the string is considered to be empty. |
|
3630
|
|
|
* |
|
3631
|
|
|
* A variable is considered empty if it does not exist or if its value equals FALSE. |
|
3632
|
|
|
* empty() does not generate a warning if the variable does not exist. |
|
3633
|
|
|
* |
|
3634
|
|
|
* @param array|float|int|string $str |
|
3635
|
|
|
* |
|
3636
|
|
|
* @psalm-pure |
|
3637
|
|
|
* |
|
3638
|
|
|
* @return bool |
|
3639
|
|
|
* <p>Whether or not $str is empty().</p> |
|
3640
|
|
|
*/ |
|
3641
|
1 |
|
public static function is_empty($str): bool |
|
3642
|
|
|
{ |
|
3643
|
1 |
|
return empty($str); |
|
3644
|
|
|
} |
|
3645
|
|
|
|
|
3646
|
|
|
/** |
|
3647
|
|
|
* Returns true if the string contains only hexadecimal chars, false otherwise. |
|
3648
|
|
|
* |
|
3649
|
|
|
* @param string $str <p>The input string.</p> |
|
3650
|
|
|
* |
|
3651
|
|
|
* @psalm-pure |
|
3652
|
|
|
* |
|
3653
|
|
|
* @return bool |
|
3654
|
|
|
* <p>Whether or not $str contains only hexadecimal chars.</p> |
|
3655
|
|
|
*/ |
|
3656
|
13 |
|
public static function is_hexadecimal(string $str): bool |
|
3657
|
|
|
{ |
|
3658
|
13 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
3659
|
13 |
|
return \mb_ereg_match('^[[:xdigit:]]*$', $str); |
|
3660
|
|
|
} |
|
3661
|
|
|
|
|
3662
|
|
|
return self::str_matches_pattern($str, '^[[:xdigit:]]*$'); |
|
3663
|
|
|
} |
|
3664
|
|
|
|
|
3665
|
|
|
/** |
|
3666
|
|
|
* Check if the string contains any HTML tags. |
|
3667
|
|
|
* |
|
3668
|
|
|
* EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code> |
|
3669
|
|
|
* |
|
3670
|
|
|
* @param string $str <p>The input string.</p> |
|
3671
|
|
|
* |
|
3672
|
|
|
* @psalm-pure |
|
3673
|
|
|
* |
|
3674
|
|
|
* @return bool |
|
3675
|
|
|
* <p>Whether or not $str contains html elements.</p> |
|
3676
|
|
|
*/ |
|
3677
|
3 |
|
public static function is_html(string $str): bool |
|
3678
|
|
|
{ |
|
3679
|
3 |
|
if ($str === '') { |
|
3680
|
3 |
|
return false; |
|
3681
|
|
|
} |
|
3682
|
|
|
|
|
3683
|
|
|
// init |
|
3684
|
3 |
|
$matches = []; |
|
3685
|
|
|
|
|
3686
|
3 |
|
$str = self::emoji_encode($str); // hack for emoji support :/ |
|
3687
|
|
|
|
|
3688
|
3 |
|
\preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches); |
|
3689
|
|
|
|
|
3690
|
3 |
|
return $matches !== []; |
|
3691
|
|
|
} |
|
3692
|
|
|
|
|
3693
|
|
|
/** |
|
3694
|
|
|
* Check if $url is an correct url. |
|
3695
|
|
|
* |
|
3696
|
|
|
* @param string $url |
|
3697
|
|
|
* @param bool $disallow_localhost |
|
3698
|
|
|
* |
|
3699
|
|
|
* @psalm-pure |
|
3700
|
|
|
* |
|
3701
|
|
|
* @return bool |
|
3702
|
|
|
*/ |
|
3703
|
1 |
|
public static function is_url(string $url, bool $disallow_localhost = false): bool |
|
3704
|
|
|
{ |
|
3705
|
1 |
|
if ($url === '') { |
|
3706
|
1 |
|
return false; |
|
3707
|
|
|
} |
|
3708
|
|
|
|
|
3709
|
|
|
// WARNING: keep this as hack protection |
|
3710
|
1 |
|
if (!self::str_istarts_with_any($url, ['http://', 'https://'])) { |
|
3711
|
1 |
|
return false; |
|
3712
|
|
|
} |
|
3713
|
|
|
|
|
3714
|
|
|
// e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/... |
|
3715
|
1 |
|
if ($disallow_localhost) { |
|
3716
|
1 |
|
if (self::str_istarts_with_any( |
|
3717
|
1 |
|
$url, |
|
3718
|
|
|
[ |
|
3719
|
1 |
|
'http://localhost', |
|
3720
|
|
|
'https://localhost', |
|
3721
|
|
|
'http://127.0.0.1', |
|
3722
|
|
|
'https://127.0.0.1', |
|
3723
|
|
|
'http://::1', |
|
3724
|
|
|
'https://::1', |
|
3725
|
|
|
] |
|
3726
|
|
|
)) { |
|
3727
|
1 |
|
return false; |
|
3728
|
|
|
} |
|
3729
|
|
|
|
|
3730
|
1 |
|
$regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu'; |
|
3731
|
1 |
|
if (\preg_match($regex, $url)) { |
|
3732
|
1 |
|
return false; |
|
3733
|
|
|
} |
|
3734
|
|
|
} |
|
3735
|
|
|
|
|
3736
|
|
|
// INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters |
|
3737
|
1 |
|
$regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu'; |
|
3738
|
1 |
|
if (\preg_match($regex, $url)) { |
|
3739
|
1 |
|
return true; |
|
3740
|
|
|
} |
|
3741
|
|
|
|
|
3742
|
1 |
|
return \filter_var($url, \FILTER_VALIDATE_URL) !== false; |
|
3743
|
|
|
} |
|
3744
|
|
|
|
|
3745
|
|
|
/** |
|
3746
|
|
|
* Try to check if "$str" is a JSON-string. |
|
3747
|
|
|
* |
|
3748
|
|
|
* EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code> |
|
3749
|
|
|
* |
|
3750
|
|
|
* @param string $str <p>The input string.</p> |
|
3751
|
|
|
* @param bool $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json |
|
3752
|
|
|
* results.</p> |
|
3753
|
|
|
* |
|
3754
|
|
|
* @return bool |
|
3755
|
|
|
* <p>Whether or not the $str is in JSON format.</p> |
|
3756
|
|
|
*/ |
|
3757
|
42 |
|
public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool |
|
3758
|
|
|
{ |
|
3759
|
42 |
|
if ($str === '') { |
|
3760
|
4 |
|
return false; |
|
3761
|
|
|
} |
|
3762
|
|
|
|
|
3763
|
40 |
|
if (self::$SUPPORT['json'] === false) { |
|
3764
|
|
|
throw new \RuntimeException('ext-json: is not installed'); |
|
3765
|
|
|
} |
|
3766
|
|
|
|
|
3767
|
40 |
|
$jsonOrNull = self::json_decode($str); |
|
3768
|
40 |
|
if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') { |
|
3769
|
18 |
|
return false; |
|
3770
|
|
|
} |
|
3771
|
|
|
|
|
3772
|
|
|
if ( |
|
3773
|
24 |
|
$only_array_or_object_results_are_valid |
|
3774
|
|
|
&& |
|
3775
|
24 |
|
!\is_object($jsonOrNull) |
|
3776
|
|
|
&& |
|
3777
|
24 |
|
!\is_array($jsonOrNull) |
|
3778
|
|
|
) { |
|
3779
|
5 |
|
return false; |
|
3780
|
|
|
} |
|
3781
|
|
|
|
|
3782
|
19 |
|
return \json_last_error() === \JSON_ERROR_NONE; |
|
3783
|
|
|
} |
|
3784
|
|
|
|
|
3785
|
|
|
/** |
|
3786
|
|
|
* @param string $str <p>The input string.</p> |
|
3787
|
|
|
* |
|
3788
|
|
|
* @psalm-pure |
|
3789
|
|
|
* |
|
3790
|
|
|
* @return bool |
|
3791
|
|
|
* <p>Whether or not $str contains only lowercase chars.</p> |
|
3792
|
|
|
*/ |
|
3793
|
8 |
|
public static function is_lowercase(string $str): bool |
|
3794
|
|
|
{ |
|
3795
|
8 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
3796
|
8 |
|
return \mb_ereg_match('^[[:lower:]]*$', $str); |
|
3797
|
|
|
} |
|
3798
|
|
|
|
|
3799
|
|
|
return self::str_matches_pattern($str, '^[[:lower:]]*$'); |
|
3800
|
|
|
} |
|
3801
|
|
|
|
|
3802
|
|
|
/** |
|
3803
|
|
|
* Returns true if the string is serialized, false otherwise. |
|
3804
|
|
|
* |
|
3805
|
|
|
* @param string $str <p>The input string.</p> |
|
3806
|
|
|
* |
|
3807
|
|
|
* @psalm-pure |
|
3808
|
|
|
* |
|
3809
|
|
|
* @return bool |
|
3810
|
|
|
* <p>Whether or not $str is serialized.</p> |
|
3811
|
|
|
*/ |
|
3812
|
7 |
|
public static function is_serialized(string $str): bool |
|
3813
|
|
|
{ |
|
3814
|
7 |
|
if ($str === '') { |
|
3815
|
1 |
|
return false; |
|
3816
|
|
|
} |
|
3817
|
|
|
|
|
3818
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
|
3819
|
|
|
/** @noinspection UnserializeExploitsInspection */ |
|
3820
|
6 |
|
return $str === 'b:0;' |
|
3821
|
|
|
|| |
|
3822
|
6 |
|
@\unserialize($str, []) !== false; |
|
3823
|
|
|
} |
|
3824
|
|
|
|
|
3825
|
|
|
/** |
|
3826
|
|
|
* Returns true if the string contains only lower case chars, false |
|
3827
|
|
|
* otherwise. |
|
3828
|
|
|
* |
|
3829
|
|
|
* @param string $str <p>The input string.</p> |
|
3830
|
|
|
* |
|
3831
|
|
|
* @psalm-pure |
|
3832
|
|
|
* |
|
3833
|
|
|
* @return bool |
|
3834
|
|
|
* <p>Whether or not $str contains only lower case characters.</p> |
|
3835
|
|
|
*/ |
|
3836
|
8 |
|
public static function is_uppercase(string $str): bool |
|
3837
|
|
|
{ |
|
3838
|
8 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
3839
|
8 |
|
return \mb_ereg_match('^[[:upper:]]*$', $str); |
|
3840
|
|
|
} |
|
3841
|
|
|
|
|
3842
|
|
|
return self::str_matches_pattern($str, '^[[:upper:]]*$'); |
|
3843
|
|
|
} |
|
3844
|
|
|
|
|
3845
|
|
|
/** |
|
3846
|
|
|
* Check if the string is UTF-16. |
|
3847
|
|
|
* |
|
3848
|
|
|
* EXAMPLE: <code> |
|
3849
|
|
|
* UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1 |
|
3850
|
|
|
* // |
|
3851
|
|
|
* UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2 |
|
3852
|
|
|
* // |
|
3853
|
|
|
* UTF8::is_utf16(file_get_contents('utf-8.txt')); // false |
|
3854
|
|
|
* </code> |
|
3855
|
|
|
* |
|
3856
|
|
|
* @param string $str <p>The input string.</p> |
|
3857
|
|
|
* @param bool $check_if_string_is_binary |
|
3858
|
|
|
* |
|
3859
|
|
|
* @psalm-pure |
|
3860
|
|
|
* |
|
3861
|
|
|
* @return false|int |
|
3862
|
|
|
* <strong>false</strong> if is't not UTF-16,<br> |
|
3863
|
|
|
* <strong>1</strong> for UTF-16LE,<br> |
|
3864
|
|
|
* <strong>2</strong> for UTF-16BE |
|
3865
|
|
|
*/ |
|
3866
|
21 |
|
public static function is_utf16($str, bool $check_if_string_is_binary = true) |
|
3867
|
|
|
{ |
|
3868
|
|
|
// init |
|
3869
|
21 |
|
$str = (string) $str; |
|
3870
|
21 |
|
$str_chars = []; |
|
3871
|
|
|
|
|
3872
|
|
|
// fix for the "binary"-check |
|
3873
|
21 |
|
if ($check_if_string_is_binary !== false && self::string_has_bom($str)) { |
|
3874
|
2 |
|
$check_if_string_is_binary = false; |
|
3875
|
|
|
} |
|
3876
|
|
|
|
|
3877
|
|
|
if ( |
|
3878
|
21 |
|
$check_if_string_is_binary |
|
3879
|
|
|
&& |
|
3880
|
21 |
|
!self::is_binary($str, true) |
|
3881
|
|
|
) { |
|
3882
|
2 |
|
return false; |
|
3883
|
|
|
} |
|
3884
|
|
|
|
|
3885
|
21 |
|
if (self::$SUPPORT['mbstring'] === false) { |
|
3886
|
|
|
/** |
|
3887
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
3888
|
|
|
*/ |
|
3889
|
3 |
|
\trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING); |
|
3890
|
|
|
} |
|
3891
|
|
|
|
|
3892
|
21 |
|
$str = self::remove_bom($str); |
|
3893
|
|
|
|
|
3894
|
21 |
|
$maybe_utf16le = 0; |
|
3895
|
21 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE'); |
|
3896
|
21 |
|
if ($test) { |
|
3897
|
15 |
|
$test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8'); |
|
3898
|
15 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE'); |
|
3899
|
15 |
|
if ($test3 === $test) { |
|
3900
|
|
|
/** |
|
3901
|
|
|
* @psalm-suppress RedundantCondition |
|
3902
|
|
|
*/ |
|
3903
|
15 |
|
if ($str_chars === []) { |
|
3904
|
15 |
|
$str_chars = self::count_chars($str, true, false); |
|
3905
|
|
|
} |
|
3906
|
15 |
|
foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { |
|
|
|
|
|
|
3907
|
15 |
|
if (\in_array($test3char, $str_chars, true)) { |
|
3908
|
15 |
|
++$maybe_utf16le; |
|
3909
|
|
|
} |
|
3910
|
|
|
} |
|
3911
|
15 |
|
unset($test3charEmpty); |
|
3912
|
|
|
} |
|
3913
|
|
|
} |
|
3914
|
|
|
|
|
3915
|
21 |
|
$maybe_utf16be = 0; |
|
3916
|
21 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE'); |
|
3917
|
21 |
|
if ($test) { |
|
3918
|
15 |
|
$test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8'); |
|
3919
|
15 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE'); |
|
3920
|
15 |
|
if ($test3 === $test) { |
|
3921
|
15 |
|
if ($str_chars === []) { |
|
3922
|
7 |
|
$str_chars = self::count_chars($str, true, false); |
|
3923
|
|
|
} |
|
3924
|
15 |
|
foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { |
|
|
|
|
|
|
3925
|
15 |
|
if (\in_array($test3char, $str_chars, true)) { |
|
3926
|
15 |
|
++$maybe_utf16be; |
|
3927
|
|
|
} |
|
3928
|
|
|
} |
|
3929
|
15 |
|
unset($test3charEmpty); |
|
3930
|
|
|
} |
|
3931
|
|
|
} |
|
3932
|
|
|
|
|
3933
|
21 |
|
if ($maybe_utf16be !== $maybe_utf16le) { |
|
3934
|
7 |
|
if ($maybe_utf16le > $maybe_utf16be) { |
|
3935
|
5 |
|
return 1; |
|
3936
|
|
|
} |
|
3937
|
|
|
|
|
3938
|
6 |
|
return 2; |
|
3939
|
|
|
} |
|
3940
|
|
|
|
|
3941
|
17 |
|
return false; |
|
3942
|
|
|
} |
|
3943
|
|
|
|
|
3944
|
|
|
/** |
|
3945
|
|
|
* Check if the string is UTF-32. |
|
3946
|
|
|
* |
|
3947
|
|
|
* EXAMPLE: <code> |
|
3948
|
|
|
* UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1 |
|
3949
|
|
|
* // |
|
3950
|
|
|
* UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2 |
|
3951
|
|
|
* // |
|
3952
|
|
|
* UTF8::is_utf32(file_get_contents('utf-8.txt')); // false |
|
3953
|
|
|
* </code> |
|
3954
|
|
|
* |
|
3955
|
|
|
* @param string $str <p>The input string.</p> |
|
3956
|
|
|
* @param bool $check_if_string_is_binary |
|
3957
|
|
|
* |
|
3958
|
|
|
* @psalm-pure |
|
3959
|
|
|
* |
|
3960
|
|
|
* @return false|int |
|
3961
|
|
|
* <strong>false</strong> if is't not UTF-32,<br> |
|
3962
|
|
|
* <strong>1</strong> for UTF-32LE,<br> |
|
3963
|
|
|
* <strong>2</strong> for UTF-32BE |
|
3964
|
|
|
*/ |
|
3965
|
19 |
|
public static function is_utf32($str, bool $check_if_string_is_binary = true) |
|
3966
|
|
|
{ |
|
3967
|
|
|
// init |
|
3968
|
19 |
|
$str = (string) $str; |
|
3969
|
19 |
|
$str_chars = []; |
|
3970
|
|
|
|
|
3971
|
|
|
// fix for the "binary"-check |
|
3972
|
19 |
|
if ($check_if_string_is_binary !== false && self::string_has_bom($str)) { |
|
3973
|
2 |
|
$check_if_string_is_binary = false; |
|
3974
|
|
|
} |
|
3975
|
|
|
|
|
3976
|
|
|
if ( |
|
3977
|
19 |
|
$check_if_string_is_binary |
|
3978
|
|
|
&& |
|
3979
|
19 |
|
!self::is_binary($str, true) |
|
3980
|
|
|
) { |
|
3981
|
2 |
|
return false; |
|
3982
|
|
|
} |
|
3983
|
|
|
|
|
3984
|
19 |
|
if (self::$SUPPORT['mbstring'] === false) { |
|
3985
|
|
|
/** |
|
3986
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
3987
|
|
|
*/ |
|
3988
|
3 |
|
\trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING); |
|
3989
|
|
|
} |
|
3990
|
|
|
|
|
3991
|
19 |
|
$str = self::remove_bom($str); |
|
3992
|
|
|
|
|
3993
|
19 |
|
$maybe_utf32le = 0; |
|
3994
|
19 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE'); |
|
3995
|
19 |
|
if ($test) { |
|
3996
|
13 |
|
$test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8'); |
|
3997
|
13 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE'); |
|
3998
|
13 |
|
if ($test3 === $test) { |
|
3999
|
|
|
/** |
|
4000
|
|
|
* @psalm-suppress RedundantCondition |
|
4001
|
|
|
*/ |
|
4002
|
13 |
|
if ($str_chars === []) { |
|
4003
|
13 |
|
$str_chars = self::count_chars($str, true, false); |
|
4004
|
|
|
} |
|
4005
|
13 |
|
foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { |
|
|
|
|
|
|
4006
|
13 |
|
if (\in_array($test3char, $str_chars, true)) { |
|
4007
|
13 |
|
++$maybe_utf32le; |
|
4008
|
|
|
} |
|
4009
|
|
|
} |
|
4010
|
13 |
|
unset($test3charEmpty); |
|
4011
|
|
|
} |
|
4012
|
|
|
} |
|
4013
|
|
|
|
|
4014
|
19 |
|
$maybe_utf32be = 0; |
|
4015
|
19 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE'); |
|
4016
|
19 |
|
if ($test) { |
|
4017
|
13 |
|
$test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8'); |
|
4018
|
13 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE'); |
|
4019
|
13 |
|
if ($test3 === $test) { |
|
4020
|
13 |
|
if ($str_chars === []) { |
|
4021
|
7 |
|
$str_chars = self::count_chars($str, true, false); |
|
4022
|
|
|
} |
|
4023
|
13 |
|
foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { |
|
|
|
|
|
|
4024
|
13 |
|
if (\in_array($test3char, $str_chars, true)) { |
|
4025
|
13 |
|
++$maybe_utf32be; |
|
4026
|
|
|
} |
|
4027
|
|
|
} |
|
4028
|
13 |
|
unset($test3charEmpty); |
|
4029
|
|
|
} |
|
4030
|
|
|
} |
|
4031
|
|
|
|
|
4032
|
19 |
|
if ($maybe_utf32be !== $maybe_utf32le) { |
|
4033
|
3 |
|
if ($maybe_utf32le > $maybe_utf32be) { |
|
4034
|
2 |
|
return 1; |
|
4035
|
|
|
} |
|
4036
|
|
|
|
|
4037
|
3 |
|
return 2; |
|
4038
|
|
|
} |
|
4039
|
|
|
|
|
4040
|
19 |
|
return false; |
|
4041
|
|
|
} |
|
4042
|
|
|
|
|
4043
|
|
|
/** |
|
4044
|
|
|
* Checks whether the passed input contains only byte sequences that appear valid UTF-8. |
|
4045
|
|
|
* |
|
4046
|
|
|
* EXAMPLE: <code> |
|
4047
|
|
|
* UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true |
|
4048
|
|
|
* // |
|
4049
|
|
|
* UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false |
|
4050
|
|
|
* </code> |
|
4051
|
|
|
* |
|
4052
|
|
|
* @param int|string|string[]|null $str <p>The input to be checked.</p> |
|
4053
|
|
|
* @param bool $strict <p>Check also if the string is not UTF-16 or UTF-32.</p> |
|
4054
|
|
|
* |
|
4055
|
|
|
* @psalm-pure |
|
4056
|
|
|
* |
|
4057
|
|
|
* @return bool |
|
4058
|
|
|
*/ |
|
4059
|
83 |
|
public static function is_utf8($str, bool $strict = false): bool |
|
4060
|
|
|
{ |
|
4061
|
83 |
|
if (\is_array($str)) { |
|
4062
|
2 |
|
foreach ($str as &$v) { |
|
4063
|
2 |
|
if (!self::is_utf8($v, $strict)) { |
|
4064
|
2 |
|
return false; |
|
4065
|
|
|
} |
|
4066
|
|
|
} |
|
4067
|
|
|
|
|
4068
|
|
|
return true; |
|
4069
|
|
|
} |
|
4070
|
|
|
|
|
4071
|
83 |
|
return self::is_utf8_string((string) $str, $strict); |
|
4072
|
|
|
} |
|
4073
|
|
|
|
|
4074
|
|
|
/** |
|
4075
|
|
|
* (PHP 5 >= 5.2.0, PECL json >= 1.2.0)<br/> |
|
4076
|
|
|
* Decodes a JSON string |
|
4077
|
|
|
* |
|
4078
|
|
|
* EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code> |
|
4079
|
|
|
* |
|
4080
|
|
|
* @see http://php.net/manual/en/function.json-decode.php |
|
4081
|
|
|
* |
|
4082
|
|
|
* @param string $json <p> |
|
4083
|
|
|
* The <i>json</i> string being decoded. |
|
4084
|
|
|
* </p> |
|
4085
|
|
|
* <p> |
|
4086
|
|
|
* This function only works with UTF-8 encoded strings. |
|
4087
|
|
|
* </p> |
|
4088
|
|
|
* <p>PHP implements a superset of |
|
4089
|
|
|
* JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard |
|
4090
|
|
|
* only supports these values when they are nested inside an array or an object. |
|
4091
|
|
|
* </p> |
|
4092
|
|
|
* @param bool $assoc [optional] <p> |
|
4093
|
|
|
* When <b>TRUE</b>, returned objects will be converted into |
|
4094
|
|
|
* associative arrays. |
|
4095
|
|
|
* </p> |
|
4096
|
|
|
* @param int $depth [optional] <p> |
|
4097
|
|
|
* User specified recursion depth. |
|
4098
|
|
|
* </p> |
|
4099
|
|
|
* @param int $options [optional] <p> |
|
4100
|
|
|
* Bitmask of JSON decode options. Currently only |
|
4101
|
|
|
* <b>JSON_BIGINT_AS_STRING</b> |
|
4102
|
|
|
* is supported (default is to cast large integers as floats) |
|
4103
|
|
|
* </p> |
|
4104
|
|
|
* |
|
4105
|
|
|
* @psalm-pure |
|
4106
|
|
|
* |
|
4107
|
|
|
* @return mixed |
|
4108
|
|
|
* <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and |
|
4109
|
|
|
* null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively. |
|
4110
|
|
|
* <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data |
|
4111
|
|
|
* is deeper than the recursion limit.</p> |
|
4112
|
|
|
*/ |
|
4113
|
43 |
|
public static function json_decode( |
|
4114
|
|
|
string $json, |
|
4115
|
|
|
bool $assoc = false, |
|
4116
|
|
|
int $depth = 512, |
|
4117
|
|
|
int $options = 0 |
|
4118
|
|
|
) { |
|
4119
|
43 |
|
$json = self::filter($json); |
|
4120
|
|
|
|
|
4121
|
43 |
|
if (self::$SUPPORT['json'] === false) { |
|
4122
|
|
|
throw new \RuntimeException('ext-json: is not installed'); |
|
4123
|
|
|
} |
|
4124
|
|
|
|
|
4125
|
43 |
|
if ($depth < 1) { |
|
4126
|
|
|
$depth = 1; |
|
4127
|
|
|
} |
|
4128
|
|
|
|
|
4129
|
43 |
|
return \json_decode($json, $assoc, $depth, $options); |
|
4130
|
|
|
} |
|
4131
|
|
|
|
|
4132
|
|
|
/** |
|
4133
|
|
|
* (PHP 5 >= 5.2.0, PECL json >= 1.2.0)<br/> |
|
4134
|
|
|
* Returns the JSON representation of a value. |
|
4135
|
|
|
* |
|
4136
|
|
|
* EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code> |
|
4137
|
|
|
* |
|
4138
|
|
|
* @see http://php.net/manual/en/function.json-encode.php |
|
4139
|
|
|
* |
|
4140
|
|
|
* @param mixed $value <p> |
|
4141
|
|
|
* The <i>value</i> being encoded. Can be any type except |
|
4142
|
|
|
* a resource. |
|
4143
|
|
|
* </p> |
|
4144
|
|
|
* <p> |
|
4145
|
|
|
* All string data must be UTF-8 encoded. |
|
4146
|
|
|
* </p> |
|
4147
|
|
|
* <p>PHP implements a superset of |
|
4148
|
|
|
* JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard |
|
4149
|
|
|
* only supports these values when they are nested inside an array or an object. |
|
4150
|
|
|
* </p> |
|
4151
|
|
|
* @param int $options [optional] <p> |
|
4152
|
|
|
* Bitmask consisting of <b>JSON_HEX_QUOT</b>, |
|
4153
|
|
|
* <b>JSON_HEX_TAG</b>, |
|
4154
|
|
|
* <b>JSON_HEX_AMP</b>, |
|
4155
|
|
|
* <b>JSON_HEX_APOS</b>, |
|
4156
|
|
|
* <b>JSON_NUMERIC_CHECK</b>, |
|
4157
|
|
|
* <b>JSON_PRETTY_PRINT</b>, |
|
4158
|
|
|
* <b>JSON_UNESCAPED_SLASHES</b>, |
|
4159
|
|
|
* <b>JSON_FORCE_OBJECT</b>, |
|
4160
|
|
|
* <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these |
|
4161
|
|
|
* constants is described on |
|
4162
|
|
|
* the JSON constants page. |
|
4163
|
|
|
* </p> |
|
4164
|
|
|
* @param int $depth [optional] <p> |
|
4165
|
|
|
* Set the maximum depth. Must be greater than zero. |
|
4166
|
|
|
* </p> |
|
4167
|
|
|
* |
|
4168
|
|
|
* @psalm-pure |
|
4169
|
|
|
* |
|
4170
|
|
|
* @return false|string |
|
4171
|
|
|
* A JSON encoded <strong>string</strong> on success or<br> |
|
4172
|
|
|
* <strong>FALSE</strong> on failure |
|
4173
|
|
|
*/ |
|
4174
|
5 |
|
public static function json_encode($value, int $options = 0, int $depth = 512) |
|
4175
|
|
|
{ |
|
4176
|
5 |
|
$value = self::filter($value); |
|
4177
|
|
|
|
|
4178
|
5 |
|
if (self::$SUPPORT['json'] === false) { |
|
4179
|
|
|
throw new \RuntimeException('ext-json: is not installed'); |
|
4180
|
|
|
} |
|
4181
|
|
|
|
|
4182
|
5 |
|
if ($depth < 1) { |
|
4183
|
|
|
$depth = 1; |
|
4184
|
|
|
} |
|
4185
|
|
|
|
|
4186
|
5 |
|
return \json_encode($value, $options, $depth); |
|
4187
|
|
|
} |
|
4188
|
|
|
|
|
4189
|
|
|
/** |
|
4190
|
|
|
* Checks whether JSON is available on the server. |
|
4191
|
|
|
* |
|
4192
|
|
|
* @psalm-pure |
|
4193
|
|
|
* |
|
4194
|
|
|
* @return bool |
|
4195
|
|
|
* <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> |
|
4196
|
|
|
* |
|
4197
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
|
4198
|
|
|
*/ |
|
4199
|
|
|
public static function json_loaded(): bool |
|
4200
|
|
|
{ |
|
4201
|
|
|
return \function_exists('json_decode'); |
|
4202
|
|
|
} |
|
4203
|
|
|
|
|
4204
|
|
|
/** |
|
4205
|
|
|
* Makes string's first char lowercase. |
|
4206
|
|
|
* |
|
4207
|
|
|
* EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code> |
|
4208
|
|
|
* |
|
4209
|
|
|
* @param string $str <p>The input string</p> |
|
4210
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
4211
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
4212
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
|
4213
|
|
|
* tr</p> |
|
4214
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
|
4215
|
|
|
* -> ß</p> |
|
4216
|
|
|
* |
|
4217
|
|
|
* @psalm-pure |
|
4218
|
|
|
* |
|
4219
|
|
|
* @return string the resulting string |
|
4220
|
|
|
*/ |
|
4221
|
46 |
|
public static function lcfirst( |
|
4222
|
|
|
string $str, |
|
4223
|
|
|
string $encoding = 'UTF-8', |
|
4224
|
|
|
bool $clean_utf8 = false, |
|
4225
|
|
|
string $lang = null, |
|
4226
|
|
|
bool $try_to_keep_the_string_length = false |
|
4227
|
|
|
): string { |
|
4228
|
46 |
|
if ($clean_utf8) { |
|
4229
|
|
|
$str = self::clean($str); |
|
4230
|
|
|
} |
|
4231
|
|
|
|
|
4232
|
46 |
|
$use_mb_functions = ($lang === null && !$try_to_keep_the_string_length); |
|
4233
|
|
|
|
|
4234
|
46 |
|
if ($encoding === 'UTF-8') { |
|
4235
|
43 |
|
$str_part_two = (string) \mb_substr($str, 1); |
|
4236
|
|
|
|
|
4237
|
43 |
|
if ($use_mb_functions) { |
|
4238
|
43 |
|
$str_part_one = \mb_strtolower( |
|
4239
|
43 |
|
(string) \mb_substr($str, 0, 1) |
|
4240
|
|
|
); |
|
4241
|
|
|
} else { |
|
4242
|
|
|
$str_part_one = self::strtolower( |
|
4243
|
|
|
(string) \mb_substr($str, 0, 1), |
|
4244
|
|
|
$encoding, |
|
4245
|
|
|
false, |
|
4246
|
|
|
$lang, |
|
4247
|
43 |
|
$try_to_keep_the_string_length |
|
4248
|
|
|
); |
|
4249
|
|
|
} |
|
4250
|
|
|
} else { |
|
4251
|
3 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
4252
|
|
|
|
|
4253
|
3 |
|
$str_part_two = (string) self::substr($str, 1, null, $encoding); |
|
4254
|
|
|
|
|
4255
|
3 |
|
$str_part_one = self::strtolower( |
|
4256
|
3 |
|
(string) self::substr($str, 0, 1, $encoding), |
|
4257
|
3 |
|
$encoding, |
|
4258
|
3 |
|
false, |
|
4259
|
3 |
|
$lang, |
|
4260
|
3 |
|
$try_to_keep_the_string_length |
|
4261
|
|
|
); |
|
4262
|
|
|
} |
|
4263
|
|
|
|
|
4264
|
46 |
|
return $str_part_one . $str_part_two; |
|
4265
|
|
|
} |
|
4266
|
|
|
|
|
4267
|
|
|
/** |
|
4268
|
|
|
* Lowercase for all words in the string. |
|
4269
|
|
|
* |
|
4270
|
|
|
* @param string $str <p>The input string.</p> |
|
4271
|
|
|
* @param string[] $exceptions [optional] <p>Exclusion for some words.</p> |
|
4272
|
|
|
* @param string $char_list [optional] <p>Additional chars that contains to words and do |
|
4273
|
|
|
* not start a new word.</p> |
|
4274
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
|
4275
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
4276
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
|
4277
|
|
|
* tr</p> |
|
4278
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
|
4279
|
|
|
* -> ß</p> |
|
4280
|
|
|
* |
|
4281
|
|
|
* @psalm-pure |
|
4282
|
|
|
* |
|
4283
|
|
|
* @return string |
|
4284
|
|
|
*/ |
|
4285
|
4 |
|
public static function lcwords( |
|
4286
|
|
|
string $str, |
|
4287
|
|
|
array $exceptions = [], |
|
4288
|
|
|
string $char_list = '', |
|
4289
|
|
|
string $encoding = 'UTF-8', |
|
4290
|
|
|
bool $clean_utf8 = false, |
|
4291
|
|
|
string $lang = null, |
|
4292
|
|
|
bool $try_to_keep_the_string_length = false |
|
4293
|
|
|
): string { |
|
4294
|
4 |
|
if (!$str) { |
|
4295
|
2 |
|
return ''; |
|
4296
|
|
|
} |
|
4297
|
|
|
|
|
4298
|
4 |
|
$words = self::str_to_words($str, $char_list); |
|
4299
|
4 |
|
$use_exceptions = $exceptions !== []; |
|
4300
|
|
|
|
|
4301
|
4 |
|
$words_str = ''; |
|
4302
|
4 |
|
foreach ($words as &$word) { |
|
4303
|
4 |
|
if (!$word) { |
|
4304
|
4 |
|
continue; |
|
4305
|
|
|
} |
|
4306
|
|
|
|
|
4307
|
|
|
if ( |
|
4308
|
4 |
|
!$use_exceptions |
|
4309
|
|
|
|| |
|
4310
|
4 |
|
!\in_array($word, $exceptions, true) |
|
4311
|
|
|
) { |
|
4312
|
4 |
|
$words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); |
|
4313
|
|
|
} else { |
|
4314
|
4 |
|
$words_str .= $word; |
|
4315
|
|
|
} |
|
4316
|
|
|
} |
|
4317
|
|
|
|
|
4318
|
4 |
|
return $words_str; |
|
4319
|
|
|
} |
|
4320
|
|
|
|
|
4321
|
|
|
/** |
|
4322
|
|
|
* Strip whitespace or other characters from the beginning of a UTF-8 string. |
|
4323
|
|
|
* |
|
4324
|
|
|
* EXAMPLE: <code>UTF8::ltrim(' 中文空白 '); // '中文空白 '</code> |
|
4325
|
|
|
* |
|
4326
|
|
|
* @param string $str <p>The string to be trimmed</p> |
|
4327
|
|
|
* @param string|null $chars <p>Optional characters to be stripped</p> |
|
4328
|
|
|
* |
|
4329
|
|
|
* @psalm-pure |
|
4330
|
|
|
* |
|
4331
|
|
|
* @return string the string with unwanted characters stripped from the left |
|
4332
|
|
|
*/ |
|
4333
|
23 |
|
public static function ltrim(string $str = '', string $chars = null): string |
|
4334
|
|
|
{ |
|
4335
|
23 |
|
if ($str === '') { |
|
4336
|
3 |
|
return ''; |
|
4337
|
|
|
} |
|
4338
|
|
|
|
|
4339
|
22 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
4340
|
22 |
|
if ($chars !== null) { |
|
4341
|
|
|
/** @noinspection PregQuoteUsageInspection */ |
|
4342
|
11 |
|
$chars = \preg_quote($chars); |
|
4343
|
11 |
|
$pattern = "^[${chars}]+"; |
|
4344
|
|
|
} else { |
|
4345
|
14 |
|
$pattern = '^[\\s]+'; |
|
4346
|
|
|
} |
|
4347
|
|
|
|
|
4348
|
22 |
|
return (string) \mb_ereg_replace($pattern, '', $str); |
|
4349
|
|
|
} |
|
4350
|
|
|
|
|
4351
|
|
|
if ($chars !== null) { |
|
4352
|
|
|
$chars = \preg_quote($chars, '/'); |
|
4353
|
|
|
$pattern = "^[${chars}]+"; |
|
4354
|
|
|
} else { |
|
4355
|
|
|
$pattern = '^[\\s]+'; |
|
4356
|
|
|
} |
|
4357
|
|
|
|
|
4358
|
|
|
return self::regex_replace($str, $pattern, ''); |
|
4359
|
|
|
} |
|
4360
|
|
|
|
|
4361
|
|
|
/** |
|
4362
|
|
|
* Returns the UTF-8 character with the maximum code point in the given data. |
|
4363
|
|
|
* |
|
4364
|
|
|
* EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code> |
|
4365
|
|
|
* |
|
4366
|
|
|
* @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p> |
|
4367
|
|
|
* |
|
4368
|
|
|
* @psalm-pure |
|
4369
|
|
|
* |
|
4370
|
|
|
* @return string|null the character with the highest code point than others, returns null on failure or empty input |
|
4371
|
|
|
*/ |
|
4372
|
2 |
|
public static function max($arg) |
|
4373
|
|
|
{ |
|
4374
|
2 |
|
if (\is_array($arg)) { |
|
4375
|
2 |
|
$arg = \implode('', $arg); |
|
4376
|
|
|
} |
|
4377
|
|
|
|
|
4378
|
2 |
|
$codepoints = self::codepoints($arg); |
|
4379
|
2 |
|
if ($codepoints === []) { |
|
4380
|
2 |
|
return null; |
|
4381
|
|
|
} |
|
4382
|
|
|
|
|
4383
|
2 |
|
$codepoint_max = \max($codepoints); |
|
4384
|
|
|
|
|
4385
|
2 |
|
return self::chr((int) $codepoint_max); |
|
4386
|
|
|
} |
|
4387
|
|
|
|
|
4388
|
|
|
/** |
|
4389
|
|
|
* Calculates and returns the maximum number of bytes taken by any |
|
4390
|
|
|
* UTF-8 encoded character in the given string. |
|
4391
|
|
|
* |
|
4392
|
|
|
* EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code> |
|
4393
|
|
|
* |
|
4394
|
|
|
* @param string $str <p>The original Unicode string.</p> |
|
4395
|
|
|
* |
|
4396
|
|
|
* @psalm-pure |
|
4397
|
|
|
* |
|
4398
|
|
|
* @return int |
|
4399
|
|
|
* <p>Max byte lengths of the given chars.</p> |
|
4400
|
|
|
*/ |
|
4401
|
2 |
|
public static function max_chr_width(string $str): int |
|
4402
|
|
|
{ |
|
4403
|
2 |
|
$bytes = self::chr_size_list($str); |
|
4404
|
2 |
|
if ($bytes !== []) { |
|
4405
|
2 |
|
return (int) \max($bytes); |
|
4406
|
|
|
} |
|
4407
|
|
|
|
|
4408
|
2 |
|
return 0; |
|
4409
|
|
|
} |
|
4410
|
|
|
|
|
4411
|
|
|
/** |
|
4412
|
|
|
* Checks whether mbstring is available on the server. |
|
4413
|
|
|
* |
|
4414
|
|
|
* @psalm-pure |
|
4415
|
|
|
* |
|
4416
|
|
|
* @return bool |
|
4417
|
|
|
* <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> |
|
4418
|
|
|
* |
|
4419
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
|
4420
|
|
|
*/ |
|
4421
|
28 |
|
public static function mbstring_loaded(): bool |
|
4422
|
|
|
{ |
|
4423
|
28 |
|
return \extension_loaded('mbstring'); |
|
4424
|
|
|
} |
|
4425
|
|
|
|
|
4426
|
|
|
/** |
|
4427
|
|
|
* Returns the UTF-8 character with the minimum code point in the given data. |
|
4428
|
|
|
* |
|
4429
|
|
|
* EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code> |
|
4430
|
|
|
* |
|
4431
|
|
|
* @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong> |
|
4432
|
|
|
* |
|
4433
|
|
|
* @psalm-pure |
|
4434
|
|
|
* |
|
4435
|
|
|
* @return string|null |
|
4436
|
|
|
* <p>The character with the lowest code point than others, returns null on failure or empty input.</p> |
|
4437
|
|
|
*/ |
|
4438
|
2 |
|
public static function min($arg) |
|
4439
|
|
|
{ |
|
4440
|
2 |
|
if (\is_array($arg)) { |
|
4441
|
2 |
|
$arg = \implode('', $arg); |
|
4442
|
|
|
} |
|
4443
|
|
|
|
|
4444
|
2 |
|
$codepoints = self::codepoints($arg); |
|
4445
|
2 |
|
if ($codepoints === []) { |
|
4446
|
2 |
|
return null; |
|
4447
|
|
|
} |
|
4448
|
|
|
|
|
4449
|
2 |
|
$codepoint_min = \min($codepoints); |
|
4450
|
|
|
|
|
4451
|
2 |
|
return self::chr((int) $codepoint_min); |
|
4452
|
|
|
} |
|
4453
|
|
|
|
|
4454
|
|
|
/** |
|
4455
|
|
|
* Normalize the encoding-"name" input. |
|
4456
|
|
|
* |
|
4457
|
|
|
* EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code> |
|
4458
|
|
|
* |
|
4459
|
|
|
* @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p> |
|
4460
|
|
|
* @param mixed $fallback <p>e.g.: UTF-8</p> |
|
4461
|
|
|
* |
|
4462
|
|
|
* @psalm-pure |
|
4463
|
|
|
* |
|
4464
|
|
|
* @return mixed|string |
|
4465
|
|
|
* <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p> |
|
4466
|
|
|
* |
|
4467
|
|
|
* @template TNormalizeEncodingFallback |
|
4468
|
|
|
* @phpstan-param string|TNormalizeEncodingFallback $fallback |
|
4469
|
|
|
* @phpstan-return string|TNormalizeEncodingFallback |
|
4470
|
|
|
*/ |
|
4471
|
339 |
|
public static function normalize_encoding($encoding, $fallback = '') |
|
4472
|
|
|
{ |
|
4473
|
|
|
/** |
|
4474
|
|
|
* @psalm-suppress ImpureStaticVariable |
|
4475
|
|
|
* |
|
4476
|
|
|
* @var array<string,string> |
|
4477
|
|
|
*/ |
|
4478
|
339 |
|
static $STATIC_NORMALIZE_ENCODING_CACHE = []; |
|
4479
|
|
|
|
|
4480
|
|
|
// init |
|
4481
|
339 |
|
$encoding = (string) $encoding; |
|
4482
|
|
|
|
|
4483
|
339 |
|
if (!$encoding) { |
|
4484
|
290 |
|
return $fallback; |
|
4485
|
|
|
} |
|
4486
|
|
|
|
|
4487
|
|
|
if ( |
|
4488
|
53 |
|
$encoding === 'UTF-8' |
|
4489
|
|
|
|| |
|
4490
|
53 |
|
$encoding === 'UTF8' |
|
4491
|
|
|
) { |
|
4492
|
29 |
|
return 'UTF-8'; |
|
4493
|
|
|
} |
|
4494
|
|
|
|
|
4495
|
|
|
if ( |
|
4496
|
44 |
|
$encoding === '8BIT' |
|
4497
|
|
|
|| |
|
4498
|
44 |
|
$encoding === 'BINARY' |
|
4499
|
|
|
) { |
|
4500
|
|
|
return 'CP850'; |
|
4501
|
|
|
} |
|
4502
|
|
|
|
|
4503
|
|
|
if ( |
|
4504
|
44 |
|
$encoding === 'HTML' |
|
4505
|
|
|
|| |
|
4506
|
44 |
|
$encoding === 'HTML-ENTITIES' |
|
4507
|
|
|
) { |
|
4508
|
2 |
|
return 'HTML-ENTITIES'; |
|
4509
|
|
|
} |
|
4510
|
|
|
|
|
4511
|
|
|
if ( |
|
4512
|
44 |
|
$encoding === 'ISO' |
|
4513
|
|
|
|| |
|
4514
|
44 |
|
$encoding === 'ISO-8859-1' |
|
4515
|
|
|
) { |
|
4516
|
41 |
|
return 'ISO-8859-1'; |
|
4517
|
|
|
} |
|
4518
|
|
|
|
|
4519
|
|
|
if ( |
|
4520
|
11 |
|
$encoding === '1' // only a fallback, for non "strict_types" usage ... |
|
4521
|
|
|
|| |
|
4522
|
11 |
|
$encoding === '0' // only a fallback, for non "strict_types" usage ... |
|
4523
|
|
|
) { |
|
4524
|
|
|
return $fallback; |
|
4525
|
|
|
} |
|
4526
|
|
|
|
|
4527
|
11 |
|
if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) { |
|
4528
|
8 |
|
return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding]; |
|
4529
|
|
|
} |
|
4530
|
|
|
|
|
4531
|
5 |
|
if (self::$ENCODINGS === null) { |
|
4532
|
1 |
|
self::$ENCODINGS = self::getData('encodings'); |
|
4533
|
|
|
} |
|
4534
|
|
|
|
|
4535
|
5 |
|
if (\in_array($encoding, self::$ENCODINGS, true)) { |
|
4536
|
3 |
|
$STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding; |
|
4537
|
|
|
|
|
4538
|
3 |
|
return $encoding; |
|
4539
|
|
|
} |
|
4540
|
|
|
|
|
4541
|
4 |
|
$encoding_original = $encoding; |
|
4542
|
4 |
|
$encoding = \strtoupper($encoding); |
|
4543
|
4 |
|
$encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding); |
|
4544
|
|
|
|
|
4545
|
|
|
$equivalences = [ |
|
4546
|
4 |
|
'ISO8859' => 'ISO-8859-1', |
|
4547
|
|
|
'ISO88591' => 'ISO-8859-1', |
|
4548
|
|
|
'ISO' => 'ISO-8859-1', |
|
4549
|
|
|
'LATIN' => 'ISO-8859-1', |
|
4550
|
|
|
'LATIN1' => 'ISO-8859-1', // Western European |
|
4551
|
|
|
'ISO88592' => 'ISO-8859-2', |
|
4552
|
|
|
'LATIN2' => 'ISO-8859-2', // Central European |
|
4553
|
|
|
'ISO88593' => 'ISO-8859-3', |
|
4554
|
|
|
'LATIN3' => 'ISO-8859-3', // Southern European |
|
4555
|
|
|
'ISO88594' => 'ISO-8859-4', |
|
4556
|
|
|
'LATIN4' => 'ISO-8859-4', // Northern European |
|
4557
|
|
|
'ISO88595' => 'ISO-8859-5', |
|
4558
|
|
|
'ISO88596' => 'ISO-8859-6', // Greek |
|
4559
|
|
|
'ISO88597' => 'ISO-8859-7', |
|
4560
|
|
|
'ISO88598' => 'ISO-8859-8', // Hebrew |
|
4561
|
|
|
'ISO88599' => 'ISO-8859-9', |
|
4562
|
|
|
'LATIN5' => 'ISO-8859-9', // Turkish |
|
4563
|
|
|
'ISO885911' => 'ISO-8859-11', |
|
4564
|
|
|
'TIS620' => 'ISO-8859-11', // Thai |
|
4565
|
|
|
'ISO885910' => 'ISO-8859-10', |
|
4566
|
|
|
'LATIN6' => 'ISO-8859-10', // Nordic |
|
4567
|
|
|
'ISO885913' => 'ISO-8859-13', |
|
4568
|
|
|
'LATIN7' => 'ISO-8859-13', // Baltic |
|
4569
|
|
|
'ISO885914' => 'ISO-8859-14', |
|
4570
|
|
|
'LATIN8' => 'ISO-8859-14', // Celtic |
|
4571
|
|
|
'ISO885915' => 'ISO-8859-15', |
|
4572
|
|
|
'LATIN9' => 'ISO-8859-15', // Western European (with some extra chars e.g. €) |
|
4573
|
|
|
'ISO885916' => 'ISO-8859-16', |
|
4574
|
|
|
'LATIN10' => 'ISO-8859-16', // Southeast European |
|
4575
|
|
|
'CP1250' => 'WINDOWS-1250', |
|
4576
|
|
|
'WIN1250' => 'WINDOWS-1250', |
|
4577
|
|
|
'WINDOWS1250' => 'WINDOWS-1250', |
|
4578
|
|
|
'CP1251' => 'WINDOWS-1251', |
|
4579
|
|
|
'WIN1251' => 'WINDOWS-1251', |
|
4580
|
|
|
'WINDOWS1251' => 'WINDOWS-1251', |
|
4581
|
|
|
'CP1252' => 'WINDOWS-1252', |
|
4582
|
|
|
'WIN1252' => 'WINDOWS-1252', |
|
4583
|
|
|
'WINDOWS1252' => 'WINDOWS-1252', |
|
4584
|
|
|
'CP1253' => 'WINDOWS-1253', |
|
4585
|
|
|
'WIN1253' => 'WINDOWS-1253', |
|
4586
|
|
|
'WINDOWS1253' => 'WINDOWS-1253', |
|
4587
|
|
|
'CP1254' => 'WINDOWS-1254', |
|
4588
|
|
|
'WIN1254' => 'WINDOWS-1254', |
|
4589
|
|
|
'WINDOWS1254' => 'WINDOWS-1254', |
|
4590
|
|
|
'CP1255' => 'WINDOWS-1255', |
|
4591
|
|
|
'WIN1255' => 'WINDOWS-1255', |
|
4592
|
|
|
'WINDOWS1255' => 'WINDOWS-1255', |
|
4593
|
|
|
'CP1256' => 'WINDOWS-1256', |
|
4594
|
|
|
'WIN1256' => 'WINDOWS-1256', |
|
4595
|
|
|
'WINDOWS1256' => 'WINDOWS-1256', |
|
4596
|
|
|
'CP1257' => 'WINDOWS-1257', |
|
4597
|
|
|
'WIN1257' => 'WINDOWS-1257', |
|
4598
|
|
|
'WINDOWS1257' => 'WINDOWS-1257', |
|
4599
|
|
|
'CP1258' => 'WINDOWS-1258', |
|
4600
|
|
|
'WIN1258' => 'WINDOWS-1258', |
|
4601
|
|
|
'WINDOWS1258' => 'WINDOWS-1258', |
|
4602
|
|
|
'UTF16' => 'UTF-16', |
|
4603
|
|
|
'UTF32' => 'UTF-32', |
|
4604
|
|
|
'UTF8' => 'UTF-8', |
|
4605
|
|
|
'UTF' => 'UTF-8', |
|
4606
|
|
|
'UTF7' => 'UTF-7', |
|
4607
|
|
|
'8BIT' => 'CP850', |
|
4608
|
|
|
'BINARY' => 'CP850', |
|
4609
|
|
|
]; |
|
4610
|
|
|
|
|
4611
|
4 |
|
if (!empty($equivalences[$encoding_upper_helper])) { |
|
4612
|
3 |
|
$encoding = $equivalences[$encoding_upper_helper]; |
|
4613
|
|
|
} |
|
4614
|
|
|
|
|
4615
|
4 |
|
$STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding; |
|
4616
|
|
|
|
|
4617
|
4 |
|
return $encoding; |
|
4618
|
|
|
} |
|
4619
|
|
|
|
|
4620
|
|
|
/** |
|
4621
|
|
|
* Standardize line ending to unix-like. |
|
4622
|
|
|
* |
|
4623
|
|
|
* @param string $str <p>The input string.</p> |
|
4624
|
|
|
* @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL |
|
4625
|
|
|
* here.</p> |
|
4626
|
|
|
* |
|
4627
|
|
|
* @psalm-pure |
|
4628
|
|
|
* |
|
4629
|
|
|
* @return string |
|
4630
|
|
|
* <p>A string with normalized line ending.</p> |
|
4631
|
|
|
*/ |
|
4632
|
4 |
|
public static function normalize_line_ending(string $str, $replacer = "\n"): string |
|
4633
|
|
|
{ |
|
4634
|
4 |
|
return \str_replace(["\r\n", "\r", "\n"], $replacer, $str); |
|
4635
|
|
|
} |
|
4636
|
|
|
|
|
4637
|
|
|
/** |
|
4638
|
|
|
* Normalize some MS Word special characters. |
|
4639
|
|
|
* |
|
4640
|
|
|
* EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code> |
|
4641
|
|
|
* |
|
4642
|
|
|
* @param string $str <p>The string to be normalized.</p> |
|
4643
|
|
|
* |
|
4644
|
|
|
* @psalm-pure |
|
4645
|
|
|
* |
|
4646
|
|
|
* @return string |
|
4647
|
|
|
* <p>A string with normalized characters for commonly used chars in Word documents.</p> |
|
4648
|
|
|
*/ |
|
4649
|
10 |
|
public static function normalize_msword(string $str): string |
|
4650
|
|
|
{ |
|
4651
|
10 |
|
return ASCII::normalize_msword($str); |
|
4652
|
|
|
} |
|
4653
|
|
|
|
|
4654
|
|
|
/** |
|
4655
|
|
|
* Normalize the whitespace. |
|
4656
|
|
|
* |
|
4657
|
|
|
* EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code> |
|
4658
|
|
|
* |
|
4659
|
|
|
* @param string $str <p>The string to be normalized.</p> |
|
4660
|
|
|
* @param bool $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces.</p> |
|
4661
|
|
|
* @param bool $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web) |
|
4662
|
|
|
* bidirectional text chars.</p> |
|
4663
|
|
|
* @param bool $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p> |
|
4664
|
|
|
* |
|
4665
|
|
|
* @psalm-pure |
|
4666
|
|
|
* |
|
4667
|
|
|
* @return string |
|
4668
|
|
|
* <p>A string with normalized whitespace.</p> |
|
4669
|
|
|
*/ |
|
4670
|
61 |
|
public static function normalize_whitespace( |
|
4671
|
|
|
string $str, |
|
4672
|
|
|
bool $keep_non_breaking_space = false, |
|
4673
|
|
|
bool $keep_bidi_unicode_controls = false, |
|
4674
|
|
|
bool $normalize_control_characters = false |
|
4675
|
|
|
): string { |
|
4676
|
61 |
|
return ASCII::normalize_whitespace( |
|
4677
|
61 |
|
$str, |
|
4678
|
61 |
|
$keep_non_breaking_space, |
|
4679
|
61 |
|
$keep_bidi_unicode_controls, |
|
4680
|
61 |
|
$normalize_control_characters |
|
4681
|
|
|
); |
|
4682
|
|
|
} |
|
4683
|
|
|
|
|
4684
|
|
|
/** |
|
4685
|
|
|
* Calculates Unicode code point of the given UTF-8 encoded character. |
|
4686
|
|
|
* |
|
4687
|
|
|
* INFO: opposite to UTF8::chr() |
|
4688
|
|
|
* |
|
4689
|
|
|
* EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code> |
|
4690
|
|
|
* |
|
4691
|
|
|
* @param string $chr <p>The character of which to calculate code point.<p/> |
|
4692
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
4693
|
|
|
* |
|
4694
|
|
|
* @psalm-pure |
|
4695
|
|
|
* |
|
4696
|
|
|
* @return int |
|
4697
|
|
|
* <p>Unicode code point of the given character,<br> |
|
4698
|
|
|
* 0 on invalid UTF-8 byte sequence</p> |
|
4699
|
|
|
*/ |
|
4700
|
27 |
|
public static function ord($chr, string $encoding = 'UTF-8'): int |
|
4701
|
|
|
{ |
|
4702
|
|
|
/** |
|
4703
|
|
|
* @psalm-suppress ImpureStaticVariable |
|
4704
|
|
|
* |
|
4705
|
|
|
* @var array<string,int> |
|
4706
|
|
|
*/ |
|
4707
|
27 |
|
static $CHAR_CACHE = []; |
|
4708
|
|
|
|
|
4709
|
|
|
// init |
|
4710
|
27 |
|
$chr = (string) $chr; |
|
4711
|
|
|
|
|
4712
|
27 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
4713
|
5 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
4714
|
|
|
} |
|
4715
|
|
|
|
|
4716
|
27 |
|
$cache_key = $chr . '_' . $encoding; |
|
4717
|
27 |
|
if (isset($CHAR_CACHE[$cache_key])) { |
|
4718
|
27 |
|
return $CHAR_CACHE[$cache_key]; |
|
4719
|
|
|
} |
|
4720
|
|
|
|
|
4721
|
|
|
// check again, if it's still not UTF-8 |
|
4722
|
11 |
|
if ($encoding !== 'UTF-8') { |
|
4723
|
3 |
|
$chr = self::encode($encoding, $chr); |
|
4724
|
|
|
} |
|
4725
|
|
|
|
|
4726
|
11 |
|
if (self::$ORD === null) { |
|
4727
|
1 |
|
self::$ORD = self::getData('ord'); |
|
4728
|
|
|
} |
|
4729
|
|
|
|
|
4730
|
11 |
|
if (isset(self::$ORD[$chr])) { |
|
4731
|
11 |
|
return $CHAR_CACHE[$cache_key] = self::$ORD[$chr]; |
|
4732
|
|
|
} |
|
4733
|
|
|
|
|
4734
|
|
|
// |
|
4735
|
|
|
// fallback via "IntlChar" |
|
4736
|
|
|
// |
|
4737
|
|
|
|
|
4738
|
6 |
|
if (self::$SUPPORT['intlChar'] === true) { |
|
4739
|
5 |
|
$code = \IntlChar::ord($chr); |
|
4740
|
5 |
|
if ($code) { |
|
|
|
|
|
|
4741
|
5 |
|
return $CHAR_CACHE[$cache_key] = $code; |
|
4742
|
|
|
} |
|
4743
|
|
|
} |
|
4744
|
|
|
|
|
4745
|
|
|
// |
|
4746
|
|
|
// fallback via vanilla php |
|
4747
|
|
|
// |
|
4748
|
|
|
|
|
4749
|
1 |
|
$chr = \unpack('C*', (string) \substr($chr, 0, 4)); |
|
4750
|
|
|
/** @noinspection PhpSillyAssignmentInspection - hack for phpstan */ |
|
4751
|
|
|
/** @var int[] $chr - "unpack": only false if the format string contains errors */ |
|
4752
|
1 |
|
$chr = $chr; |
|
4753
|
1 |
|
$code = $chr ? $chr[1] : 0; |
|
4754
|
|
|
|
|
4755
|
1 |
|
if ($code >= 0xF0 && isset($chr[4])) { |
|
4756
|
|
|
return $CHAR_CACHE[$cache_key] = ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80); |
|
4757
|
|
|
} |
|
4758
|
|
|
|
|
4759
|
1 |
|
if ($code >= 0xE0 && isset($chr[3])) { |
|
4760
|
1 |
|
return $CHAR_CACHE[$cache_key] = ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80); |
|
4761
|
|
|
} |
|
4762
|
|
|
|
|
4763
|
1 |
|
if ($code >= 0xC0 && isset($chr[2])) { |
|
4764
|
1 |
|
return $CHAR_CACHE[$cache_key] = ((($code - 0xC0) << 6) + $chr[2] - 0x80); |
|
4765
|
|
|
} |
|
4766
|
|
|
|
|
4767
|
|
|
return $CHAR_CACHE[$cache_key] = $code; |
|
4768
|
|
|
} |
|
4769
|
|
|
|
|
4770
|
|
|
/** |
|
4771
|
|
|
* Parses the string into an array (into the the second parameter). |
|
4772
|
|
|
* |
|
4773
|
|
|
* WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope, |
|
4774
|
|
|
* if the second parameter is not set! |
|
4775
|
|
|
* |
|
4776
|
|
|
* EXAMPLE: <code> |
|
4777
|
|
|
* UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array); |
|
4778
|
|
|
* echo $array['Iñtërnâtiônéàlizætiøn']; // '測試' |
|
4779
|
|
|
* </code> |
|
4780
|
|
|
* |
|
4781
|
|
|
* @see http://php.net/manual/en/function.parse-str.php |
|
4782
|
|
|
* |
|
4783
|
|
|
* @param string $str <p>The input string.</p> |
|
4784
|
|
|
* @param array $result <p>The result will be returned into this reference parameter.</p> |
|
4785
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
4786
|
|
|
* |
|
4787
|
|
|
* @psalm-pure |
|
4788
|
|
|
* |
|
4789
|
|
|
* @return bool |
|
4790
|
|
|
* <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p> |
|
4791
|
|
|
*/ |
|
4792
|
2 |
|
public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool |
|
4793
|
|
|
{ |
|
4794
|
2 |
|
if ($clean_utf8) { |
|
4795
|
2 |
|
$str = self::clean($str); |
|
4796
|
|
|
} |
|
4797
|
|
|
|
|
4798
|
2 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
4799
|
2 |
|
$return = \mb_parse_str($str, $result); |
|
4800
|
|
|
|
|
4801
|
2 |
|
return $return !== false && $result !== []; |
|
4802
|
|
|
} |
|
4803
|
|
|
|
|
4804
|
|
|
/** |
|
4805
|
|
|
* @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic |
|
4806
|
|
|
*/ |
|
4807
|
|
|
\parse_str($str, $result); |
|
4808
|
|
|
|
|
4809
|
|
|
return $result !== []; |
|
4810
|
|
|
} |
|
4811
|
|
|
|
|
4812
|
|
|
/** |
|
4813
|
|
|
* Checks if \u modifier is available that enables Unicode support in PCRE. |
|
4814
|
|
|
* |
|
4815
|
|
|
* @psalm-pure |
|
4816
|
|
|
* |
|
4817
|
|
|
* @return bool |
|
4818
|
|
|
* <p> |
|
4819
|
|
|
* <strong>true</strong> if support is available,<br> |
|
4820
|
|
|
* <strong>false</strong> otherwise |
|
4821
|
|
|
* </p> |
|
4822
|
|
|
*/ |
|
4823
|
|
|
public static function pcre_utf8_support(): bool |
|
4824
|
|
|
{ |
|
4825
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
|
4826
|
|
|
return (bool) @\preg_match('//u', ''); |
|
4827
|
|
|
} |
|
4828
|
|
|
|
|
4829
|
|
|
/** |
|
4830
|
|
|
* Create an array containing a range of UTF-8 characters. |
|
4831
|
|
|
* |
|
4832
|
|
|
* EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code> |
|
4833
|
|
|
* |
|
4834
|
|
|
* @param int|string $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p> |
|
4835
|
|
|
* @param int|string $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p> |
|
4836
|
|
|
* @param bool $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple |
|
4837
|
|
|
* "is_numeric"</p> |
|
4838
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
4839
|
|
|
* @param float|int $step [optional] <p> |
|
4840
|
|
|
* If a step value is given, it will be used as the |
|
4841
|
|
|
* increment between elements in the sequence. step |
|
4842
|
|
|
* should be given as a positive number. If not specified, |
|
4843
|
|
|
* step will default to 1. |
|
4844
|
|
|
* </p> |
|
4845
|
|
|
* |
|
4846
|
|
|
* @psalm-pure |
|
4847
|
|
|
* |
|
4848
|
|
|
* @return string[] |
|
4849
|
|
|
*/ |
|
4850
|
2 |
|
public static function range( |
|
4851
|
|
|
$var1, |
|
4852
|
|
|
$var2, |
|
4853
|
|
|
bool $use_ctype = true, |
|
4854
|
|
|
string $encoding = 'UTF-8', |
|
4855
|
|
|
$step = 1 |
|
4856
|
|
|
): array { |
|
4857
|
2 |
|
if (!$var1 || !$var2) { |
|
4858
|
2 |
|
return []; |
|
4859
|
|
|
} |
|
4860
|
|
|
|
|
4861
|
2 |
|
if ($step !== 1) { |
|
4862
|
|
|
/** |
|
4863
|
|
|
* @psalm-suppress RedundantConditionGivenDocblockType |
|
4864
|
|
|
* @psalm-suppress DocblockTypeContradiction |
|
4865
|
|
|
*/ |
|
4866
|
1 |
|
if (!\is_numeric($step)) { |
|
|
|
|
|
|
4867
|
|
|
throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step)); |
|
4868
|
|
|
} |
|
4869
|
|
|
|
|
4870
|
|
|
/** |
|
4871
|
|
|
* @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm? |
|
4872
|
|
|
*/ |
|
4873
|
1 |
|
if ($step <= 0) { |
|
4874
|
|
|
throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step); |
|
4875
|
|
|
} |
|
4876
|
|
|
} |
|
4877
|
|
|
|
|
4878
|
2 |
|
if ($use_ctype && self::$SUPPORT['ctype'] === false) { |
|
4879
|
|
|
throw new \RuntimeException('ext-ctype: is not installed'); |
|
4880
|
|
|
} |
|
4881
|
|
|
|
|
4882
|
2 |
|
$is_digit = false; |
|
4883
|
2 |
|
$is_xdigit = false; |
|
4884
|
|
|
|
|
4885
|
2 |
|
if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) { |
|
4886
|
2 |
|
$is_digit = true; |
|
4887
|
2 |
|
$start = (int) $var1; |
|
4888
|
2 |
|
} elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) { |
|
4889
|
|
|
$is_xdigit = true; |
|
4890
|
|
|
$start = (int) self::hex_to_int((string) $var1); |
|
4891
|
2 |
|
} elseif (!$use_ctype && \is_numeric($var1)) { |
|
4892
|
1 |
|
$start = (int) $var1; |
|
4893
|
|
|
} else { |
|
4894
|
2 |
|
$start = self::ord((string) $var1); |
|
4895
|
|
|
} |
|
4896
|
|
|
|
|
4897
|
2 |
|
if (!$start) { |
|
4898
|
|
|
return []; |
|
4899
|
|
|
} |
|
4900
|
|
|
|
|
4901
|
2 |
|
if ($is_digit) { |
|
4902
|
2 |
|
$end = (int) $var2; |
|
4903
|
2 |
|
} elseif ($is_xdigit) { |
|
4904
|
|
|
$end = (int) self::hex_to_int((string) $var2); |
|
4905
|
2 |
|
} elseif (!$use_ctype && \is_numeric($var2)) { |
|
4906
|
1 |
|
$end = (int) $var2; |
|
4907
|
|
|
} else { |
|
4908
|
2 |
|
$end = self::ord((string) $var2); |
|
4909
|
|
|
} |
|
4910
|
|
|
|
|
4911
|
2 |
|
if (!$end) { |
|
4912
|
|
|
return []; |
|
4913
|
|
|
} |
|
4914
|
|
|
|
|
4915
|
2 |
|
$array = []; |
|
4916
|
2 |
|
foreach (\range($start, $end, $step) as $i) { |
|
4917
|
2 |
|
$array[] = (string) self::chr((int) $i, $encoding); |
|
4918
|
|
|
} |
|
4919
|
|
|
|
|
4920
|
2 |
|
return $array; |
|
4921
|
|
|
} |
|
4922
|
|
|
|
|
4923
|
|
|
/** |
|
4924
|
|
|
* Multi decode HTML entity + fix urlencoded-win1252-chars. |
|
4925
|
|
|
* |
|
4926
|
|
|
* EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code> |
|
4927
|
|
|
* |
|
4928
|
|
|
* e.g: |
|
4929
|
|
|
* 'test+test' => 'test+test' |
|
4930
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
|
4931
|
|
|
* 'D%FCsseldorf' => 'Düsseldorf' |
|
4932
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
|
4933
|
|
|
* 'D%26%23xFC%3Bsseldorf' => 'Düsseldorf' |
|
4934
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
|
4935
|
|
|
* 'D%C3%BCsseldorf' => 'Düsseldorf' |
|
4936
|
|
|
* 'D%C3%83%C2%BCsseldorf' => 'Düsseldorf' |
|
4937
|
|
|
* 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf' |
|
4938
|
|
|
* |
|
4939
|
|
|
* @param string $str <p>The input string.</p> |
|
4940
|
|
|
* @param bool $multi_decode <p>Decode as often as possible.</p> |
|
4941
|
|
|
* |
|
4942
|
|
|
* @psalm-pure |
|
4943
|
|
|
* |
|
4944
|
|
|
* @return string |
|
4945
|
|
|
* <p>The decoded URL, as a string.</p> |
|
4946
|
|
|
*/ |
|
4947
|
6 |
|
public static function rawurldecode(string $str, bool $multi_decode = true): string |
|
4948
|
|
|
{ |
|
4949
|
6 |
|
if ($str === '') { |
|
4950
|
4 |
|
return ''; |
|
4951
|
|
|
} |
|
4952
|
|
|
|
|
4953
|
6 |
|
$str = self::urldecode_unicode_helper($str); |
|
4954
|
|
|
|
|
4955
|
6 |
|
if ($multi_decode) { |
|
4956
|
|
|
do { |
|
4957
|
5 |
|
$str_compare = $str; |
|
4958
|
|
|
|
|
4959
|
|
|
/** |
|
4960
|
|
|
* @psalm-suppress PossiblyInvalidArgument |
|
4961
|
|
|
*/ |
|
4962
|
5 |
|
$str = \rawurldecode( |
|
4963
|
5 |
|
self::html_entity_decode( |
|
4964
|
5 |
|
self::to_utf8($str), |
|
4965
|
5 |
|
\ENT_QUOTES | \ENT_HTML5 |
|
4966
|
|
|
) |
|
4967
|
|
|
); |
|
4968
|
5 |
|
} while ($str_compare !== $str); |
|
4969
|
|
|
} else { |
|
4970
|
|
|
/** |
|
4971
|
|
|
* @psalm-suppress PossiblyInvalidArgument |
|
4972
|
|
|
*/ |
|
4973
|
1 |
|
$str = \rawurldecode( |
|
4974
|
1 |
|
self::html_entity_decode( |
|
4975
|
1 |
|
self::to_utf8($str), |
|
4976
|
1 |
|
\ENT_QUOTES | \ENT_HTML5 |
|
4977
|
|
|
) |
|
4978
|
|
|
); |
|
4979
|
|
|
} |
|
4980
|
|
|
|
|
4981
|
6 |
|
return self::fix_simple_utf8($str); |
|
4982
|
|
|
} |
|
4983
|
|
|
|
|
4984
|
|
|
/** |
|
4985
|
|
|
* Replaces all occurrences of $pattern in $str by $replacement. |
|
4986
|
|
|
* |
|
4987
|
|
|
* @param string $str <p>The input string.</p> |
|
4988
|
|
|
* @param string $pattern <p>The regular expression pattern.</p> |
|
4989
|
|
|
* @param string $replacement <p>The string to replace with.</p> |
|
4990
|
|
|
* @param string $options [optional] <p>Matching conditions to be used.</p> |
|
4991
|
|
|
* @param string $delimiter [optional] <p>Delimiter the the regex. Default: '/'</p> |
|
4992
|
|
|
* |
|
4993
|
|
|
* @psalm-pure |
|
4994
|
|
|
* |
|
4995
|
|
|
* @return string |
|
4996
|
|
|
*/ |
|
4997
|
18 |
|
public static function regex_replace( |
|
4998
|
|
|
string $str, |
|
4999
|
|
|
string $pattern, |
|
5000
|
|
|
string $replacement, |
|
5001
|
|
|
string $options = '', |
|
5002
|
|
|
string $delimiter = '/' |
|
5003
|
|
|
): string { |
|
5004
|
18 |
|
if ($options === 'msr') { |
|
5005
|
9 |
|
$options = 'ms'; |
|
5006
|
|
|
} |
|
5007
|
|
|
|
|
5008
|
|
|
// fallback |
|
5009
|
18 |
|
if (!$delimiter) { |
|
5010
|
|
|
$delimiter = '/'; |
|
5011
|
|
|
} |
|
5012
|
|
|
|
|
5013
|
18 |
|
return (string) \preg_replace( |
|
5014
|
18 |
|
$delimiter . $pattern . $delimiter . 'u' . $options, |
|
5015
|
18 |
|
$replacement, |
|
5016
|
18 |
|
$str |
|
5017
|
|
|
); |
|
5018
|
|
|
} |
|
5019
|
|
|
|
|
5020
|
|
|
/** |
|
5021
|
|
|
* Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings. |
|
5022
|
|
|
* |
|
5023
|
|
|
* EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code> |
|
5024
|
|
|
* |
|
5025
|
|
|
* @param string $str <p>The input string.</p> |
|
5026
|
|
|
* |
|
5027
|
|
|
* @psalm-pure |
|
5028
|
|
|
* |
|
5029
|
|
|
* @return string |
|
5030
|
|
|
* <p>A string without UTF-BOM.</p> |
|
5031
|
|
|
*/ |
|
5032
|
54 |
|
public static function remove_bom(string $str): string |
|
5033
|
|
|
{ |
|
5034
|
54 |
|
if ($str === '') { |
|
5035
|
9 |
|
return ''; |
|
5036
|
|
|
} |
|
5037
|
|
|
|
|
5038
|
54 |
|
$str_length = \strlen($str); |
|
5039
|
54 |
|
foreach (self::$BOM as $bom_string => $bom_byte_length) { |
|
5040
|
54 |
|
if (\strncmp($str, $bom_string, $bom_byte_length) === 0) { |
|
5041
|
|
|
/** @var false|string $str_tmp - needed for PhpStan (stubs error) */ |
|
5042
|
9 |
|
$str_tmp = \substr($str, $bom_byte_length, $str_length); |
|
5043
|
9 |
|
if ($str_tmp === false) { |
|
5044
|
|
|
return ''; |
|
5045
|
|
|
} |
|
5046
|
|
|
|
|
5047
|
9 |
|
$str_length -= $bom_byte_length; |
|
5048
|
|
|
|
|
5049
|
54 |
|
$str = (string) $str_tmp; |
|
5050
|
|
|
} |
|
5051
|
|
|
} |
|
5052
|
|
|
|
|
5053
|
54 |
|
return $str; |
|
5054
|
|
|
} |
|
5055
|
|
|
|
|
5056
|
|
|
/** |
|
5057
|
|
|
* Removes duplicate occurrences of a string in another string. |
|
5058
|
|
|
* |
|
5059
|
|
|
* EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code> |
|
5060
|
|
|
* |
|
5061
|
|
|
* @param string $str <p>The base string.</p> |
|
5062
|
|
|
* @param string|string[] $what <p>String to search for in the base string.</p> |
|
5063
|
|
|
* |
|
5064
|
|
|
* @psalm-pure |
|
5065
|
|
|
* |
|
5066
|
|
|
* @return string |
|
5067
|
|
|
* <p>A string with removed duplicates.</p> |
|
5068
|
|
|
*/ |
|
5069
|
2 |
|
public static function remove_duplicates(string $str, $what = ' '): string |
|
5070
|
|
|
{ |
|
5071
|
2 |
|
if (\is_string($what)) { |
|
5072
|
2 |
|
$what = [$what]; |
|
5073
|
|
|
} |
|
5074
|
|
|
|
|
5075
|
|
|
/** |
|
5076
|
|
|
* @psalm-suppress RedundantConditionGivenDocblockType |
|
5077
|
|
|
*/ |
|
5078
|
2 |
|
if (\is_array($what)) { |
|
|
|
|
|
|
5079
|
2 |
|
foreach ($what as $item) { |
|
5080
|
2 |
|
$str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str); |
|
5081
|
|
|
} |
|
5082
|
|
|
} |
|
5083
|
|
|
|
|
5084
|
2 |
|
return $str; |
|
5085
|
|
|
} |
|
5086
|
|
|
|
|
5087
|
|
|
/** |
|
5088
|
|
|
* Remove html via "strip_tags()" from the string. |
|
5089
|
|
|
* |
|
5090
|
|
|
* @param string $str <p>The input string.</p> |
|
5091
|
|
|
* @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which |
|
5092
|
|
|
* should not be stripped. Default: null |
|
5093
|
|
|
* </p> |
|
5094
|
|
|
* |
|
5095
|
|
|
* @psalm-pure |
|
5096
|
|
|
* |
|
5097
|
|
|
* @return string |
|
5098
|
|
|
* <p>A string with without html tags.</p> |
|
5099
|
|
|
*/ |
|
5100
|
6 |
|
public static function remove_html(string $str, string $allowable_tags = ''): string |
|
5101
|
|
|
{ |
|
5102
|
6 |
|
return \strip_tags($str, $allowable_tags); |
|
5103
|
|
|
} |
|
5104
|
|
|
|
|
5105
|
|
|
/** |
|
5106
|
|
|
* Remove all breaks [<br> | \r\n | \r | \n | ...] from the string. |
|
5107
|
|
|
* |
|
5108
|
|
|
* @param string $str <p>The input string.</p> |
|
5109
|
|
|
* @param string $replacement [optional] <p>Default is a empty string.</p> |
|
5110
|
|
|
* |
|
5111
|
|
|
* @psalm-pure |
|
5112
|
|
|
* |
|
5113
|
|
|
* @return string |
|
5114
|
|
|
* <p>A string without breaks.</p> |
|
5115
|
|
|
*/ |
|
5116
|
6 |
|
public static function remove_html_breaks(string $str, string $replacement = ''): string |
|
5117
|
|
|
{ |
|
5118
|
6 |
|
return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str); |
|
5119
|
|
|
} |
|
5120
|
|
|
|
|
5121
|
|
|
/** |
|
5122
|
|
|
* Remove invisible characters from a string. |
|
5123
|
|
|
* |
|
5124
|
|
|
* e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script. |
|
5125
|
|
|
* |
|
5126
|
|
|
* EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code> |
|
5127
|
|
|
* |
|
5128
|
|
|
* copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php |
|
5129
|
|
|
* |
|
5130
|
|
|
* @param string $str <p>The input string.</p> |
|
5131
|
|
|
* @param bool $url_encoded [optional] <p> |
|
5132
|
|
|
* Try to remove url encoded control character. |
|
5133
|
|
|
* WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa. |
|
5134
|
|
|
* <br> |
|
5135
|
|
|
* Default: false |
|
5136
|
|
|
* </p> |
|
5137
|
|
|
* @param string $replacement [optional] <p>The replacement character.</p> |
|
5138
|
|
|
* @param bool $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p> |
|
5139
|
|
|
* |
|
5140
|
|
|
* @psalm-pure |
|
5141
|
|
|
* |
|
5142
|
|
|
* @return string |
|
5143
|
|
|
* <p>A string without invisible chars.</p> |
|
5144
|
|
|
*/ |
|
5145
|
92 |
|
public static function remove_invisible_characters( |
|
5146
|
|
|
string $str, |
|
5147
|
|
|
bool $url_encoded = false, |
|
5148
|
|
|
string $replacement = '', |
|
5149
|
|
|
bool $keep_basic_control_characters = true |
|
5150
|
|
|
): string { |
|
5151
|
92 |
|
return ASCII::remove_invisible_characters( |
|
5152
|
92 |
|
$str, |
|
5153
|
92 |
|
$url_encoded, |
|
5154
|
92 |
|
$replacement, |
|
5155
|
92 |
|
$keep_basic_control_characters |
|
5156
|
|
|
); |
|
5157
|
|
|
} |
|
5158
|
|
|
|
|
5159
|
|
|
/** |
|
5160
|
|
|
* Returns a new string with the prefix $substring removed, if present. |
|
5161
|
|
|
* |
|
5162
|
|
|
* @param string $str <p>The input string.</p> |
|
5163
|
|
|
* @param string $substring <p>The prefix to remove.</p> |
|
5164
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
5165
|
|
|
* |
|
5166
|
|
|
* @psalm-pure |
|
5167
|
|
|
* |
|
5168
|
|
|
* @return string |
|
5169
|
|
|
* <p>A string without the prefix $substring.</p> |
|
5170
|
|
|
*/ |
|
5171
|
12 |
|
public static function remove_left( |
|
5172
|
|
|
string $str, |
|
5173
|
|
|
string $substring, |
|
5174
|
|
|
string $encoding = 'UTF-8' |
|
5175
|
|
|
): string { |
|
5176
|
|
|
if ( |
|
5177
|
12 |
|
$substring |
|
5178
|
|
|
&& |
|
5179
|
12 |
|
\strpos($str, $substring) === 0 |
|
5180
|
|
|
) { |
|
5181
|
6 |
|
if ($encoding === 'UTF-8') { |
|
5182
|
4 |
|
return (string) \mb_substr( |
|
5183
|
4 |
|
$str, |
|
5184
|
4 |
|
(int) \mb_strlen($substring) |
|
5185
|
|
|
); |
|
5186
|
|
|
} |
|
5187
|
|
|
|
|
5188
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
5189
|
|
|
|
|
5190
|
2 |
|
return (string) self::substr( |
|
5191
|
2 |
|
$str, |
|
5192
|
2 |
|
(int) self::strlen($substring, $encoding), |
|
5193
|
2 |
|
null, |
|
5194
|
2 |
|
$encoding |
|
5195
|
|
|
); |
|
5196
|
|
|
} |
|
5197
|
|
|
|
|
5198
|
6 |
|
return $str; |
|
5199
|
|
|
} |
|
5200
|
|
|
|
|
5201
|
|
|
/** |
|
5202
|
|
|
* Returns a new string with the suffix $substring removed, if present. |
|
5203
|
|
|
* |
|
5204
|
|
|
* @param string $str |
|
5205
|
|
|
* @param string $substring <p>The suffix to remove.</p> |
|
5206
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
5207
|
|
|
* |
|
5208
|
|
|
* @psalm-pure |
|
5209
|
|
|
* |
|
5210
|
|
|
* @return string |
|
5211
|
|
|
* <p>A string having a $str without the suffix $substring.</p> |
|
5212
|
|
|
*/ |
|
5213
|
12 |
|
public static function remove_right( |
|
5214
|
|
|
string $str, |
|
5215
|
|
|
string $substring, |
|
5216
|
|
|
string $encoding = 'UTF-8' |
|
5217
|
|
|
): string { |
|
5218
|
12 |
|
if ($substring && \substr($str, -\strlen($substring)) === $substring) { |
|
5219
|
6 |
|
if ($encoding === 'UTF-8') { |
|
5220
|
4 |
|
return (string) \mb_substr( |
|
5221
|
4 |
|
$str, |
|
5222
|
4 |
|
0, |
|
5223
|
4 |
|
(int) \mb_strlen($str) - (int) \mb_strlen($substring) |
|
5224
|
|
|
); |
|
5225
|
|
|
} |
|
5226
|
|
|
|
|
5227
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
5228
|
|
|
|
|
5229
|
2 |
|
return (string) self::substr( |
|
5230
|
2 |
|
$str, |
|
5231
|
2 |
|
0, |
|
5232
|
2 |
|
(int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding), |
|
5233
|
2 |
|
$encoding |
|
5234
|
|
|
); |
|
5235
|
|
|
} |
|
5236
|
|
|
|
|
5237
|
6 |
|
return $str; |
|
5238
|
|
|
} |
|
5239
|
|
|
|
|
5240
|
|
|
/** |
|
5241
|
|
|
* Replaces all occurrences of $search in $str by $replacement. |
|
5242
|
|
|
* |
|
5243
|
|
|
* @param string $str <p>The input string.</p> |
|
5244
|
|
|
* @param string $search <p>The needle to search for.</p> |
|
5245
|
|
|
* @param string $replacement <p>The string to replace with.</p> |
|
5246
|
|
|
* @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
|
5247
|
|
|
* |
|
5248
|
|
|
* @psalm-pure |
|
5249
|
|
|
* |
|
5250
|
|
|
* @return string |
|
5251
|
|
|
* <p>A string with replaced parts.</p> |
|
5252
|
|
|
*/ |
|
5253
|
29 |
|
public static function replace( |
|
5254
|
|
|
string $str, |
|
5255
|
|
|
string $search, |
|
5256
|
|
|
string $replacement, |
|
5257
|
|
|
bool $case_sensitive = true |
|
5258
|
|
|
): string { |
|
5259
|
29 |
|
if ($case_sensitive) { |
|
5260
|
22 |
|
return \str_replace($search, $replacement, $str); |
|
5261
|
|
|
} |
|
5262
|
|
|
|
|
5263
|
7 |
|
return self::str_ireplace($search, $replacement, $str); |
|
|
|
|
|
|
5264
|
|
|
} |
|
5265
|
|
|
|
|
5266
|
|
|
/** |
|
5267
|
|
|
* Replaces all occurrences of $search in $str by $replacement. |
|
5268
|
|
|
* |
|
5269
|
|
|
* @param string $str <p>The input string.</p> |
|
5270
|
|
|
* @param array $search <p>The elements to search for.</p> |
|
5271
|
|
|
* @param array|string $replacement <p>The string to replace with.</p> |
|
5272
|
|
|
* @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
|
5273
|
|
|
* |
|
5274
|
|
|
* @psalm-pure |
|
5275
|
|
|
* |
|
5276
|
|
|
* @return string |
|
5277
|
|
|
* <p>A string with replaced parts.</p> |
|
5278
|
|
|
*/ |
|
5279
|
30 |
|
public static function replace_all( |
|
5280
|
|
|
string $str, |
|
5281
|
|
|
array $search, |
|
5282
|
|
|
$replacement, |
|
5283
|
|
|
bool $case_sensitive = true |
|
5284
|
|
|
): string { |
|
5285
|
30 |
|
if ($case_sensitive) { |
|
5286
|
23 |
|
return \str_replace($search, $replacement, $str); |
|
5287
|
|
|
} |
|
5288
|
|
|
|
|
5289
|
7 |
|
return self::str_ireplace($search, $replacement, $str); |
|
|
|
|
|
|
5290
|
|
|
} |
|
5291
|
|
|
|
|
5292
|
|
|
/** |
|
5293
|
|
|
* Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement. |
|
5294
|
|
|
* |
|
5295
|
|
|
* EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code> |
|
5296
|
|
|
* |
|
5297
|
|
|
* @param string $str <p>The input string</p> |
|
5298
|
|
|
* @param string $replacement_char <p>The replacement character.</p> |
|
5299
|
|
|
* @param bool $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p> |
|
5300
|
|
|
* |
|
5301
|
|
|
* @psalm-pure |
|
5302
|
|
|
* |
|
5303
|
|
|
* @return string |
|
5304
|
|
|
* <p>A string without diamond question marks (�).</p> |
|
5305
|
|
|
*/ |
|
5306
|
35 |
|
public static function replace_diamond_question_mark( |
|
5307
|
|
|
string $str, |
|
5308
|
|
|
string $replacement_char = '', |
|
5309
|
|
|
bool $process_invalid_utf8_chars = true |
|
5310
|
|
|
): string { |
|
5311
|
35 |
|
if ($str === '') { |
|
5312
|
9 |
|
return ''; |
|
5313
|
|
|
} |
|
5314
|
|
|
|
|
5315
|
35 |
|
if ($process_invalid_utf8_chars) { |
|
5316
|
35 |
|
if ($replacement_char === '') { |
|
5317
|
35 |
|
$replacement_char_helper = 'none'; |
|
5318
|
|
|
} else { |
|
5319
|
2 |
|
$replacement_char_helper = \ord($replacement_char); |
|
5320
|
|
|
} |
|
5321
|
|
|
|
|
5322
|
35 |
|
if (self::$SUPPORT['mbstring'] === false) { |
|
5323
|
|
|
// if there is no native support for "mbstring", |
|
5324
|
|
|
// then we need to clean the string before ... |
|
5325
|
|
|
$str = self::clean($str); |
|
5326
|
|
|
} |
|
5327
|
|
|
|
|
5328
|
|
|
/** |
|
5329
|
|
|
* @psalm-suppress ImpureFunctionCall - we will reset the value in the next step |
|
5330
|
|
|
*/ |
|
5331
|
35 |
|
$save = \mb_substitute_character(); |
|
5332
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */ |
|
5333
|
35 |
|
@\mb_substitute_character($replacement_char_helper); |
|
|
|
|
|
|
5334
|
|
|
// the polyfill maybe return false, so cast to string |
|
5335
|
35 |
|
$str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8'); |
|
5336
|
35 |
|
\mb_substitute_character($save); |
|
|
|
|
|
|
5337
|
|
|
} |
|
5338
|
|
|
|
|
5339
|
35 |
|
return \str_replace( |
|
5340
|
|
|
[ |
|
5341
|
35 |
|
"\xEF\xBF\xBD", |
|
5342
|
|
|
'�', |
|
5343
|
|
|
], |
|
5344
|
|
|
[ |
|
5345
|
35 |
|
$replacement_char, |
|
5346
|
35 |
|
$replacement_char, |
|
5347
|
|
|
], |
|
5348
|
35 |
|
$str |
|
5349
|
|
|
); |
|
5350
|
|
|
} |
|
5351
|
|
|
|
|
5352
|
|
|
/** |
|
5353
|
|
|
* Strip whitespace or other characters from the end of a UTF-8 string. |
|
5354
|
|
|
* |
|
5355
|
|
|
* EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白- '); // '-ABC-中文空白-'</code> |
|
5356
|
|
|
* |
|
5357
|
|
|
* @param string $str <p>The string to be trimmed.</p> |
|
5358
|
|
|
* @param string|null $chars <p>Optional characters to be stripped.</p> |
|
5359
|
|
|
* |
|
5360
|
|
|
* @psalm-pure |
|
5361
|
|
|
* |
|
5362
|
|
|
* @return string |
|
5363
|
|
|
* <p>A string with unwanted characters stripped from the right.</p> |
|
5364
|
|
|
*/ |
|
5365
|
21 |
|
public static function rtrim(string $str = '', string $chars = null): string |
|
5366
|
|
|
{ |
|
5367
|
21 |
|
if ($str === '') { |
|
5368
|
3 |
|
return ''; |
|
5369
|
|
|
} |
|
5370
|
|
|
|
|
5371
|
20 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
5372
|
20 |
|
if ($chars !== null) { |
|
5373
|
|
|
/** @noinspection PregQuoteUsageInspection */ |
|
5374
|
9 |
|
$chars = \preg_quote($chars); |
|
5375
|
9 |
|
$pattern = "[${chars}]+$"; |
|
5376
|
|
|
} else { |
|
5377
|
14 |
|
$pattern = '[\\s]+$'; |
|
5378
|
|
|
} |
|
5379
|
|
|
|
|
5380
|
20 |
|
return (string) \mb_ereg_replace($pattern, '', $str); |
|
5381
|
|
|
} |
|
5382
|
|
|
|
|
5383
|
|
|
if ($chars !== null) { |
|
5384
|
|
|
$chars = \preg_quote($chars, '/'); |
|
5385
|
|
|
$pattern = "[${chars}]+$"; |
|
5386
|
|
|
} else { |
|
5387
|
|
|
$pattern = '[\\s]+$'; |
|
5388
|
|
|
} |
|
5389
|
|
|
|
|
5390
|
|
|
return self::regex_replace($str, $pattern, ''); |
|
5391
|
|
|
} |
|
5392
|
|
|
|
|
5393
|
|
|
/** |
|
5394
|
|
|
* WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging. |
|
5395
|
|
|
* |
|
5396
|
|
|
* @param bool $useEcho |
|
5397
|
|
|
* |
|
5398
|
|
|
* @psalm-pure |
|
5399
|
|
|
* |
|
5400
|
|
|
* @return string|void |
|
5401
|
|
|
*/ |
|
5402
|
2 |
|
public static function showSupport(bool $useEcho = true) |
|
5403
|
|
|
{ |
|
5404
|
|
|
// init |
|
5405
|
2 |
|
$html = ''; |
|
5406
|
|
|
|
|
5407
|
2 |
|
$html .= '<pre>'; |
|
5408
|
2 |
|
foreach (self::$SUPPORT as $key => &$value) { |
|
5409
|
2 |
|
$html .= $key . ' - ' . \print_r($value, true) . "\n<br>"; |
|
|
|
|
|
|
5410
|
|
|
} |
|
5411
|
2 |
|
$html .= '</pre>'; |
|
5412
|
|
|
|
|
5413
|
2 |
|
if ($useEcho) { |
|
5414
|
1 |
|
echo $html; |
|
5415
|
|
|
} |
|
5416
|
|
|
|
|
5417
|
2 |
|
return $html; |
|
5418
|
|
|
} |
|
5419
|
|
|
|
|
5420
|
|
|
/** |
|
5421
|
|
|
* Converts a UTF-8 character to HTML Numbered Entity like "{". |
|
5422
|
|
|
* |
|
5423
|
|
|
* EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // 'κ'</code> |
|
5424
|
|
|
* |
|
5425
|
|
|
* @param string $char <p>The Unicode character to be encoded as numbered entity.</p> |
|
5426
|
|
|
* @param bool $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</> |
|
5427
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
5428
|
|
|
* |
|
5429
|
|
|
* @psalm-pure |
|
5430
|
|
|
* |
|
5431
|
|
|
* @return string |
|
5432
|
|
|
* <p>The HTML numbered entity for the given character.</p> |
|
5433
|
|
|
*/ |
|
5434
|
2 |
|
public static function single_chr_html_encode( |
|
5435
|
|
|
string $char, |
|
5436
|
|
|
bool $keep_ascii_chars = false, |
|
5437
|
|
|
string $encoding = 'UTF-8' |
|
5438
|
|
|
): string { |
|
5439
|
2 |
|
if ($char === '') { |
|
5440
|
2 |
|
return ''; |
|
5441
|
|
|
} |
|
5442
|
|
|
|
|
5443
|
|
|
if ( |
|
5444
|
2 |
|
$keep_ascii_chars |
|
5445
|
|
|
&& |
|
5446
|
2 |
|
ASCII::is_ascii($char) |
|
5447
|
|
|
) { |
|
5448
|
2 |
|
return $char; |
|
5449
|
|
|
} |
|
5450
|
|
|
|
|
5451
|
2 |
|
return '&#' . self::ord($char, $encoding) . ';'; |
|
5452
|
|
|
} |
|
5453
|
|
|
|
|
5454
|
|
|
/** |
|
5455
|
|
|
* @param string $str |
|
5456
|
|
|
* @param int $tab_length |
|
5457
|
|
|
* |
|
5458
|
|
|
* @psalm-pure |
|
5459
|
|
|
* |
|
5460
|
|
|
* @return string |
|
5461
|
|
|
*/ |
|
5462
|
5 |
|
public static function spaces_to_tabs(string $str, int $tab_length = 4): string |
|
5463
|
|
|
{ |
|
5464
|
5 |
|
if ($tab_length === 4) { |
|
5465
|
3 |
|
$tab = ' '; |
|
5466
|
2 |
|
} elseif ($tab_length === 2) { |
|
5467
|
1 |
|
$tab = ' '; |
|
5468
|
|
|
} else { |
|
5469
|
1 |
|
$tab = \str_repeat(' ', $tab_length); |
|
5470
|
|
|
} |
|
5471
|
|
|
|
|
5472
|
5 |
|
return \str_replace($tab, "\t", $str); |
|
5473
|
|
|
} |
|
5474
|
|
|
|
|
5475
|
|
|
/** |
|
5476
|
|
|
* Returns a camelCase version of the string. Trims surrounding spaces, |
|
5477
|
|
|
* capitalizes letters following digits, spaces, dashes and underscores, |
|
5478
|
|
|
* and removes spaces, dashes, as well as underscores. |
|
5479
|
|
|
* |
|
5480
|
|
|
* @param string $str <p>The input string.</p> |
|
5481
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
5482
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
5483
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
|
5484
|
|
|
* tr</p> |
|
5485
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
|
5486
|
|
|
* -> ß</p> |
|
5487
|
|
|
* |
|
5488
|
|
|
* @psalm-pure |
|
5489
|
|
|
* |
|
5490
|
|
|
* @return string |
|
5491
|
|
|
*/ |
|
5492
|
32 |
|
public static function str_camelize( |
|
5493
|
|
|
string $str, |
|
5494
|
|
|
string $encoding = 'UTF-8', |
|
5495
|
|
|
bool $clean_utf8 = false, |
|
5496
|
|
|
string $lang = null, |
|
5497
|
|
|
bool $try_to_keep_the_string_length = false |
|
5498
|
|
|
): string { |
|
5499
|
32 |
|
if ($clean_utf8) { |
|
5500
|
|
|
$str = self::clean($str); |
|
5501
|
|
|
} |
|
5502
|
|
|
|
|
5503
|
32 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
5504
|
26 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
5505
|
|
|
} |
|
5506
|
|
|
|
|
5507
|
32 |
|
$str = self::lcfirst( |
|
5508
|
32 |
|
\trim($str), |
|
5509
|
32 |
|
$encoding, |
|
5510
|
32 |
|
false, |
|
5511
|
32 |
|
$lang, |
|
5512
|
32 |
|
$try_to_keep_the_string_length |
|
5513
|
|
|
); |
|
5514
|
32 |
|
$str = (string) \preg_replace('/^[-_]+/', '', $str); |
|
5515
|
|
|
|
|
5516
|
32 |
|
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length; |
|
5517
|
|
|
|
|
5518
|
32 |
|
$str = (string) \preg_replace_callback( |
|
5519
|
32 |
|
'/[-_\\s]+(.)?/u', |
|
5520
|
|
|
/** |
|
5521
|
|
|
* @param array $match |
|
5522
|
|
|
* |
|
5523
|
|
|
* @psalm-pure |
|
5524
|
|
|
* |
|
5525
|
|
|
* @return string |
|
5526
|
|
|
*/ |
|
5527
|
|
|
static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string { |
|
5528
|
27 |
|
if (isset($match[1])) { |
|
5529
|
27 |
|
if ($use_mb_functions) { |
|
5530
|
27 |
|
if ($encoding === 'UTF-8') { |
|
5531
|
27 |
|
return \mb_strtoupper($match[1]); |
|
5532
|
|
|
} |
|
5533
|
|
|
|
|
5534
|
|
|
return \mb_strtoupper($match[1], $encoding); |
|
5535
|
|
|
} |
|
5536
|
|
|
|
|
5537
|
|
|
return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length); |
|
5538
|
|
|
} |
|
5539
|
|
|
|
|
5540
|
1 |
|
return ''; |
|
5541
|
32 |
|
}, |
|
5542
|
32 |
|
$str |
|
5543
|
|
|
); |
|
5544
|
|
|
|
|
5545
|
32 |
|
return (string) \preg_replace_callback( |
|
5546
|
32 |
|
'/[\\p{N}]+(.)?/u', |
|
5547
|
|
|
/** |
|
5548
|
|
|
* @param array $match |
|
5549
|
|
|
* |
|
5550
|
|
|
* @psalm-pure |
|
5551
|
|
|
* |
|
5552
|
|
|
* @return string |
|
5553
|
|
|
*/ |
|
5554
|
|
|
static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string { |
|
5555
|
6 |
|
if ($use_mb_functions) { |
|
5556
|
6 |
|
if ($encoding === 'UTF-8') { |
|
5557
|
6 |
|
return \mb_strtoupper($match[0]); |
|
5558
|
|
|
} |
|
5559
|
|
|
|
|
5560
|
|
|
return \mb_strtoupper($match[0], $encoding); |
|
5561
|
|
|
} |
|
5562
|
|
|
|
|
5563
|
|
|
return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); |
|
5564
|
32 |
|
}, |
|
5565
|
32 |
|
$str |
|
5566
|
|
|
); |
|
5567
|
|
|
} |
|
5568
|
|
|
|
|
5569
|
|
|
/** |
|
5570
|
|
|
* Returns the string with the first letter of each word capitalized, |
|
5571
|
|
|
* except for when the word is a name which shouldn't be capitalized. |
|
5572
|
|
|
* |
|
5573
|
|
|
* @param string $str |
|
5574
|
|
|
* |
|
5575
|
|
|
* @psalm-pure |
|
5576
|
|
|
* |
|
5577
|
|
|
* @return string |
|
5578
|
|
|
* <p>A string with $str capitalized.</p> |
|
5579
|
|
|
*/ |
|
5580
|
1 |
|
public static function str_capitalize_name(string $str): string |
|
5581
|
|
|
{ |
|
5582
|
1 |
|
return self::str_capitalize_name_helper( |
|
5583
|
1 |
|
self::str_capitalize_name_helper( |
|
5584
|
1 |
|
self::collapse_whitespace($str), |
|
5585
|
1 |
|
' ' |
|
5586
|
|
|
), |
|
5587
|
1 |
|
'-' |
|
5588
|
|
|
); |
|
5589
|
|
|
} |
|
5590
|
|
|
|
|
5591
|
|
|
/** |
|
5592
|
|
|
* Returns true if the string contains $needle, false otherwise. By default |
|
5593
|
|
|
* the comparison is case-sensitive, but can be made insensitive by setting |
|
5594
|
|
|
* $case_sensitive to false. |
|
5595
|
|
|
* |
|
5596
|
|
|
* @param string $haystack <p>The input string.</p> |
|
5597
|
|
|
* @param string $needle <p>Substring to look for.</p> |
|
5598
|
|
|
* @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
|
5599
|
|
|
* |
|
5600
|
|
|
* @psalm-pure |
|
5601
|
|
|
* |
|
5602
|
|
|
* @return bool |
|
5603
|
|
|
* <p>Whether or not $haystack contains $needle.</p> |
|
5604
|
|
|
*/ |
|
5605
|
21 |
|
public static function str_contains( |
|
5606
|
|
|
string $haystack, |
|
5607
|
|
|
string $needle, |
|
5608
|
|
|
bool $case_sensitive = true |
|
5609
|
|
|
): bool { |
|
5610
|
21 |
|
if ($case_sensitive) { |
|
5611
|
11 |
|
if (\PHP_VERSION_ID >= 80000) { |
|
5612
|
|
|
/** @phpstan-ignore-next-line - only for PHP8 */ |
|
5613
|
|
|
return \str_contains($haystack, $needle); |
|
5614
|
|
|
} |
|
5615
|
|
|
|
|
5616
|
11 |
|
return \strpos($haystack, $needle) !== false; |
|
5617
|
|
|
} |
|
5618
|
|
|
|
|
5619
|
10 |
|
return \mb_stripos($haystack, $needle) !== false; |
|
5620
|
|
|
} |
|
5621
|
|
|
|
|
5622
|
|
|
/** |
|
5623
|
|
|
* Returns true if the string contains all $needles, false otherwise. By |
|
5624
|
|
|
* default the comparison is case-sensitive, but can be made insensitive by |
|
5625
|
|
|
* setting $case_sensitive to false. |
|
5626
|
|
|
* |
|
5627
|
|
|
* @param string $haystack <p>The input string.</p> |
|
5628
|
|
|
* @param array $needles <p>SubStrings to look for.</p> |
|
5629
|
|
|
* @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
|
5630
|
|
|
* |
|
5631
|
|
|
* @psalm-pure |
|
5632
|
|
|
* |
|
5633
|
|
|
* @return bool |
|
5634
|
|
|
* <p>Whether or not $haystack contains $needle.</p> |
|
5635
|
|
|
*/ |
|
5636
|
45 |
|
public static function str_contains_all( |
|
5637
|
|
|
string $haystack, |
|
5638
|
|
|
array $needles, |
|
5639
|
|
|
bool $case_sensitive = true |
|
5640
|
|
|
): bool { |
|
5641
|
45 |
|
if ($haystack === '' || $needles === []) { |
|
5642
|
1 |
|
return false; |
|
5643
|
|
|
} |
|
5644
|
|
|
|
|
5645
|
44 |
|
foreach ($needles as &$needle) { |
|
5646
|
44 |
|
if ($case_sensitive) { |
|
5647
|
24 |
|
if (!$needle || \strpos($haystack, $needle) === false) { |
|
5648
|
12 |
|
return false; |
|
5649
|
|
|
} |
|
5650
|
|
|
} |
|
5651
|
|
|
|
|
5652
|
33 |
|
if (!$needle || \mb_stripos($haystack, $needle) === false) { |
|
5653
|
33 |
|
return false; |
|
5654
|
|
|
} |
|
5655
|
|
|
} |
|
5656
|
|
|
|
|
5657
|
24 |
|
return true; |
|
5658
|
|
|
} |
|
5659
|
|
|
|
|
5660
|
|
|
/** |
|
5661
|
|
|
* Returns true if the string contains any $needles, false otherwise. By |
|
5662
|
|
|
* default the comparison is case-sensitive, but can be made insensitive by |
|
5663
|
|
|
* setting $case_sensitive to false. |
|
5664
|
|
|
* |
|
5665
|
|
|
* @param string $haystack <p>The input string.</p> |
|
5666
|
|
|
* @param array $needles <p>SubStrings to look for.</p> |
|
5667
|
|
|
* @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
|
5668
|
|
|
* |
|
5669
|
|
|
* @psalm-pure |
|
5670
|
|
|
* |
|
5671
|
|
|
* @return bool |
|
5672
|
|
|
* <p>Whether or not $str contains $needle.</p> |
|
5673
|
|
|
*/ |
|
5674
|
46 |
|
public static function str_contains_any( |
|
5675
|
|
|
string $haystack, |
|
5676
|
|
|
array $needles, |
|
5677
|
|
|
bool $case_sensitive = true |
|
5678
|
|
|
): bool { |
|
5679
|
46 |
|
if ($haystack === '' || $needles === []) { |
|
5680
|
1 |
|
return false; |
|
5681
|
|
|
} |
|
5682
|
|
|
|
|
5683
|
45 |
|
foreach ($needles as &$needle) { |
|
5684
|
45 |
|
if (!$needle) { |
|
5685
|
|
|
continue; |
|
5686
|
|
|
} |
|
5687
|
|
|
|
|
5688
|
45 |
|
if ($case_sensitive) { |
|
5689
|
25 |
|
if (\strpos($haystack, $needle) !== false) { |
|
5690
|
14 |
|
return true; |
|
5691
|
|
|
} |
|
5692
|
|
|
|
|
5693
|
13 |
|
continue; |
|
5694
|
|
|
} |
|
5695
|
|
|
|
|
5696
|
20 |
|
if (\mb_stripos($haystack, $needle) !== false) { |
|
5697
|
20 |
|
return true; |
|
5698
|
|
|
} |
|
5699
|
|
|
} |
|
5700
|
|
|
|
|
5701
|
19 |
|
return false; |
|
5702
|
|
|
} |
|
5703
|
|
|
|
|
5704
|
|
|
/** |
|
5705
|
|
|
* Returns a lowercase and trimmed string separated by dashes. Dashes are |
|
5706
|
|
|
* inserted before uppercase characters (with the exception of the first |
|
5707
|
|
|
* character of the string), and in place of spaces as well as underscores. |
|
5708
|
|
|
* |
|
5709
|
|
|
* @param string $str <p>The input string.</p> |
|
5710
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
5711
|
|
|
* |
|
5712
|
|
|
* @psalm-pure |
|
5713
|
|
|
* |
|
5714
|
|
|
* @return string |
|
5715
|
|
|
*/ |
|
5716
|
19 |
|
public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string |
|
5717
|
|
|
{ |
|
5718
|
19 |
|
return self::str_delimit($str, '-', $encoding); |
|
5719
|
|
|
} |
|
5720
|
|
|
|
|
5721
|
|
|
/** |
|
5722
|
|
|
* Returns a lowercase and trimmed string separated by the given delimiter. |
|
5723
|
|
|
* Delimiters are inserted before uppercase characters (with the exception |
|
5724
|
|
|
* of the first character of the string), and in place of spaces, dashes, |
|
5725
|
|
|
* and underscores. Alpha delimiters are not converted to lowercase. |
|
5726
|
|
|
* |
|
5727
|
|
|
* @param string $str <p>The input string.</p> |
|
5728
|
|
|
* @param string $delimiter <p>Sequence used to separate parts of the string.</p> |
|
5729
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
5730
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
5731
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
|
5732
|
|
|
* tr</p> |
|
5733
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> |
|
5734
|
|
|
* ß</p> |
|
5735
|
|
|
* |
|
5736
|
|
|
* @psalm-pure |
|
5737
|
|
|
* |
|
5738
|
|
|
* @return string |
|
5739
|
|
|
*/ |
|
5740
|
49 |
|
public static function str_delimit( |
|
5741
|
|
|
string $str, |
|
5742
|
|
|
string $delimiter, |
|
5743
|
|
|
string $encoding = 'UTF-8', |
|
5744
|
|
|
bool $clean_utf8 = false, |
|
5745
|
|
|
string $lang = null, |
|
5746
|
|
|
bool $try_to_keep_the_string_length = false |
|
5747
|
|
|
): string { |
|
5748
|
49 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
5749
|
49 |
|
$str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str)); |
|
5750
|
|
|
|
|
5751
|
49 |
|
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length; |
|
5752
|
49 |
|
if ($use_mb_functions && $encoding === 'UTF-8') { |
|
5753
|
22 |
|
$str = \mb_strtolower($str); |
|
5754
|
|
|
} else { |
|
5755
|
27 |
|
$str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); |
|
5756
|
|
|
} |
|
5757
|
|
|
|
|
5758
|
49 |
|
return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str); |
|
5759
|
|
|
} |
|
5760
|
|
|
|
|
5761
|
|
|
$str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str)); |
|
5762
|
|
|
|
|
5763
|
|
|
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length; |
|
5764
|
|
|
if ($use_mb_functions && $encoding === 'UTF-8') { |
|
5765
|
|
|
$str = \mb_strtolower($str); |
|
5766
|
|
|
} else { |
|
5767
|
|
|
$str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); |
|
5768
|
|
|
} |
|
5769
|
|
|
|
|
5770
|
|
|
return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str); |
|
5771
|
|
|
} |
|
5772
|
|
|
|
|
5773
|
|
|
/** |
|
5774
|
|
|
* Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32. |
|
5775
|
|
|
* |
|
5776
|
|
|
* EXAMPLE: <code> |
|
5777
|
|
|
* UTF8::str_detect_encoding('中文空白'); // 'UTF-8' |
|
5778
|
|
|
* UTF8::str_detect_encoding('Abc'); // 'ASCII' |
|
5779
|
|
|
* </code> |
|
5780
|
|
|
* |
|
5781
|
|
|
* @param string $str <p>The input string.</p> |
|
5782
|
|
|
* |
|
5783
|
|
|
* @psalm-pure |
|
5784
|
|
|
* |
|
5785
|
|
|
* @return false|string |
|
5786
|
|
|
* <p> |
|
5787
|
|
|
* The detected string-encoding e.g. UTF-8 or UTF-16BE,<br> |
|
5788
|
|
|
* otherwise it will return false e.g. for BINARY or not detected encoding. |
|
5789
|
|
|
* </p> |
|
5790
|
|
|
*/ |
|
5791
|
30 |
|
public static function str_detect_encoding($str) |
|
5792
|
|
|
{ |
|
5793
|
|
|
// init |
|
5794
|
30 |
|
$str = (string) $str; |
|
5795
|
|
|
|
|
5796
|
|
|
// |
|
5797
|
|
|
// 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ... |
|
5798
|
|
|
// |
|
5799
|
|
|
|
|
5800
|
30 |
|
if (self::is_binary($str, self::string_has_bom($str) ? false : true)) { |
|
5801
|
10 |
|
$is_utf32 = self::is_utf32($str, false); |
|
5802
|
10 |
|
if ($is_utf32 === 1) { |
|
5803
|
|
|
return 'UTF-32LE'; |
|
5804
|
|
|
} |
|
5805
|
10 |
|
if ($is_utf32 === 2) { |
|
5806
|
1 |
|
return 'UTF-32BE'; |
|
5807
|
|
|
} |
|
5808
|
|
|
|
|
5809
|
10 |
|
$is_utf16 = self::is_utf16($str, false); |
|
5810
|
10 |
|
if ($is_utf16 === 1) { |
|
5811
|
3 |
|
return 'UTF-16LE'; |
|
5812
|
|
|
} |
|
5813
|
10 |
|
if ($is_utf16 === 2) { |
|
5814
|
2 |
|
return 'UTF-16BE'; |
|
5815
|
|
|
} |
|
5816
|
|
|
|
|
5817
|
|
|
// is binary but not "UTF-16" or "UTF-32" |
|
5818
|
8 |
|
return false; |
|
5819
|
|
|
} |
|
5820
|
|
|
|
|
5821
|
|
|
// |
|
5822
|
|
|
// 2.) simple check for ASCII chars |
|
5823
|
|
|
// |
|
5824
|
|
|
|
|
5825
|
27 |
|
if (ASCII::is_ascii($str)) { |
|
5826
|
10 |
|
return 'ASCII'; |
|
5827
|
|
|
} |
|
5828
|
|
|
|
|
5829
|
|
|
// |
|
5830
|
|
|
// 3.) simple check for UTF-8 chars |
|
5831
|
|
|
// |
|
5832
|
|
|
|
|
5833
|
27 |
|
if (self::is_utf8_string($str)) { |
|
5834
|
19 |
|
return 'UTF-8'; |
|
5835
|
|
|
} |
|
5836
|
|
|
|
|
5837
|
|
|
// |
|
5838
|
|
|
// 4.) check via "mb_detect_encoding()" |
|
5839
|
|
|
// |
|
5840
|
|
|
// INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()" |
|
5841
|
|
|
|
|
5842
|
|
|
$encoding_detecting_order = [ |
|
5843
|
16 |
|
'ISO-8859-1', |
|
5844
|
|
|
'ISO-8859-2', |
|
5845
|
|
|
'ISO-8859-3', |
|
5846
|
|
|
'ISO-8859-4', |
|
5847
|
|
|
'ISO-8859-5', |
|
5848
|
|
|
'ISO-8859-6', |
|
5849
|
|
|
'ISO-8859-7', |
|
5850
|
|
|
'ISO-8859-8', |
|
5851
|
|
|
'ISO-8859-9', |
|
5852
|
|
|
'ISO-8859-10', |
|
5853
|
|
|
'ISO-8859-13', |
|
5854
|
|
|
'ISO-8859-14', |
|
5855
|
|
|
'ISO-8859-15', |
|
5856
|
|
|
'ISO-8859-16', |
|
5857
|
|
|
'WINDOWS-1251', |
|
5858
|
|
|
'WINDOWS-1252', |
|
5859
|
|
|
'WINDOWS-1254', |
|
5860
|
|
|
'CP932', |
|
5861
|
|
|
'CP936', |
|
5862
|
|
|
'CP950', |
|
5863
|
|
|
'CP866', |
|
5864
|
|
|
'CP850', |
|
5865
|
|
|
'CP51932', |
|
5866
|
|
|
'CP50220', |
|
5867
|
|
|
'CP50221', |
|
5868
|
|
|
'CP50222', |
|
5869
|
|
|
'ISO-2022-JP', |
|
5870
|
|
|
'ISO-2022-KR', |
|
5871
|
|
|
'JIS', |
|
5872
|
|
|
'JIS-ms', |
|
5873
|
|
|
'EUC-CN', |
|
5874
|
|
|
'EUC-JP', |
|
5875
|
|
|
]; |
|
5876
|
|
|
|
|
5877
|
16 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
5878
|
|
|
// info: do not use the symfony polyfill here |
|
5879
|
16 |
|
$encoding = \mb_detect_encoding($str, $encoding_detecting_order, true); |
|
5880
|
16 |
|
if ($encoding) { |
|
5881
|
16 |
|
return $encoding; |
|
5882
|
|
|
} |
|
5883
|
|
|
} |
|
5884
|
|
|
|
|
5885
|
|
|
// |
|
5886
|
|
|
// 5.) check via "iconv()" |
|
5887
|
|
|
// |
|
5888
|
|
|
|
|
5889
|
|
|
if (self::$ENCODINGS === null) { |
|
5890
|
|
|
self::$ENCODINGS = self::getData('encodings'); |
|
5891
|
|
|
} |
|
5892
|
|
|
|
|
5893
|
|
|
foreach (self::$ENCODINGS as $encoding_tmp) { |
|
5894
|
|
|
// INFO: //IGNORE but still throw notice |
|
5895
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
|
5896
|
|
|
if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) { |
|
5897
|
|
|
return $encoding_tmp; |
|
5898
|
|
|
} |
|
5899
|
|
|
} |
|
5900
|
|
|
|
|
5901
|
|
|
return false; |
|
5902
|
|
|
} |
|
5903
|
|
|
|
|
5904
|
|
|
/** |
|
5905
|
|
|
* Check if the string ends with the given substring. |
|
5906
|
|
|
* |
|
5907
|
|
|
* EXAMPLE: <code> |
|
5908
|
|
|
* UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true |
|
5909
|
|
|
* UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false |
|
5910
|
|
|
* </code> |
|
5911
|
|
|
* |
|
5912
|
|
|
* @param string $haystack <p>The string to search in.</p> |
|
5913
|
|
|
* @param string $needle <p>The substring to search for.</p> |
|
5914
|
|
|
* |
|
5915
|
|
|
* @psalm-pure |
|
5916
|
|
|
* |
|
5917
|
|
|
* @return bool |
|
5918
|
|
|
*/ |
|
5919
|
9 |
|
public static function str_ends_with(string $haystack, string $needle): bool |
|
5920
|
|
|
{ |
|
5921
|
9 |
|
if ($needle === '') { |
|
5922
|
2 |
|
return true; |
|
5923
|
|
|
} |
|
5924
|
|
|
|
|
5925
|
9 |
|
if ($haystack === '') { |
|
5926
|
|
|
return false; |
|
5927
|
|
|
} |
|
5928
|
|
|
|
|
5929
|
9 |
|
if (\PHP_VERSION_ID >= 80000) { |
|
5930
|
|
|
/** @phpstan-ignore-next-line - only for PHP8 */ |
|
5931
|
|
|
return \str_ends_with($haystack, $needle); |
|
5932
|
|
|
} |
|
5933
|
|
|
|
|
5934
|
9 |
|
return \substr($haystack, -\strlen($needle)) === $needle; |
|
5935
|
|
|
} |
|
5936
|
|
|
|
|
5937
|
|
|
/** |
|
5938
|
|
|
* Returns true if the string ends with any of $substrings, false otherwise. |
|
5939
|
|
|
* |
|
5940
|
|
|
* - case-sensitive |
|
5941
|
|
|
* |
|
5942
|
|
|
* @param string $str <p>The input string.</p> |
|
5943
|
|
|
* @param string[] $substrings <p>Substrings to look for.</p> |
|
5944
|
|
|
* |
|
5945
|
|
|
* @psalm-pure |
|
5946
|
|
|
* |
|
5947
|
|
|
* @return bool |
|
5948
|
|
|
* <p>Whether or not $str ends with $substring.</p> |
|
5949
|
|
|
*/ |
|
5950
|
7 |
|
public static function str_ends_with_any(string $str, array $substrings): bool |
|
5951
|
|
|
{ |
|
5952
|
7 |
|
if ($substrings === []) { |
|
5953
|
|
|
return false; |
|
5954
|
|
|
} |
|
5955
|
|
|
|
|
5956
|
7 |
|
foreach ($substrings as &$substring) { |
|
5957
|
7 |
|
if (\substr($str, -\strlen($substring)) === $substring) { |
|
5958
|
7 |
|
return true; |
|
5959
|
|
|
} |
|
5960
|
|
|
} |
|
5961
|
|
|
|
|
5962
|
6 |
|
return false; |
|
5963
|
|
|
} |
|
5964
|
|
|
|
|
5965
|
|
|
/** |
|
5966
|
|
|
* Ensures that the string begins with $substring. If it doesn't, it's |
|
5967
|
|
|
* prepended. |
|
5968
|
|
|
* |
|
5969
|
|
|
* @param string $str <p>The input string.</p> |
|
5970
|
|
|
* @param string $substring <p>The substring to add if not present.</p> |
|
5971
|
|
|
* |
|
5972
|
|
|
* @psalm-pure |
|
5973
|
|
|
* |
|
5974
|
|
|
* @return string |
|
5975
|
|
|
*/ |
|
5976
|
10 |
|
public static function str_ensure_left(string $str, string $substring): string |
|
5977
|
|
|
{ |
|
5978
|
|
|
if ( |
|
5979
|
10 |
|
$substring !== '' |
|
5980
|
|
|
&& |
|
5981
|
10 |
|
\strpos($str, $substring) === 0 |
|
5982
|
|
|
) { |
|
5983
|
6 |
|
return $str; |
|
5984
|
|
|
} |
|
5985
|
|
|
|
|
5986
|
4 |
|
return $substring . $str; |
|
5987
|
|
|
} |
|
5988
|
|
|
|
|
5989
|
|
|
/** |
|
5990
|
|
|
* Ensures that the string ends with $substring. If it doesn't, it's appended. |
|
5991
|
|
|
* |
|
5992
|
|
|
* @param string $str <p>The input string.</p> |
|
5993
|
|
|
* @param string $substring <p>The substring to add if not present.</p> |
|
5994
|
|
|
* |
|
5995
|
|
|
* @psalm-pure |
|
5996
|
|
|
* |
|
5997
|
|
|
* @return string |
|
5998
|
|
|
*/ |
|
5999
|
10 |
|
public static function str_ensure_right(string $str, string $substring): string |
|
6000
|
|
|
{ |
|
6001
|
|
|
if ( |
|
6002
|
10 |
|
$str === '' |
|
6003
|
|
|
|| |
|
6004
|
10 |
|
$substring === '' |
|
6005
|
|
|
|| |
|
6006
|
10 |
|
\substr($str, -\strlen($substring)) !== $substring |
|
6007
|
|
|
) { |
|
6008
|
4 |
|
$str .= $substring; |
|
6009
|
|
|
} |
|
6010
|
|
|
|
|
6011
|
10 |
|
return $str; |
|
6012
|
|
|
} |
|
6013
|
|
|
|
|
6014
|
|
|
/** |
|
6015
|
|
|
* Capitalizes the first word of the string, replaces underscores with |
|
6016
|
|
|
* spaces, and strips '_id'. |
|
6017
|
|
|
* |
|
6018
|
|
|
* @param string $str |
|
6019
|
|
|
* |
|
6020
|
|
|
* @psalm-pure |
|
6021
|
|
|
* |
|
6022
|
|
|
* @return string |
|
6023
|
|
|
*/ |
|
6024
|
3 |
|
public static function str_humanize($str): string |
|
6025
|
|
|
{ |
|
6026
|
3 |
|
$str = \str_replace( |
|
6027
|
|
|
[ |
|
6028
|
3 |
|
'_id', |
|
6029
|
|
|
'_', |
|
6030
|
|
|
], |
|
6031
|
|
|
[ |
|
6032
|
3 |
|
'', |
|
6033
|
|
|
' ', |
|
6034
|
|
|
], |
|
6035
|
3 |
|
$str |
|
6036
|
|
|
); |
|
6037
|
|
|
|
|
6038
|
3 |
|
return self::ucfirst(\trim($str)); |
|
6039
|
|
|
} |
|
6040
|
|
|
|
|
6041
|
|
|
/** |
|
6042
|
|
|
* Check if the string ends with the given substring, case-insensitive. |
|
6043
|
|
|
* |
|
6044
|
|
|
* EXAMPLE: <code> |
|
6045
|
|
|
* UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true |
|
6046
|
|
|
* UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true |
|
6047
|
|
|
* </code> |
|
6048
|
|
|
* |
|
6049
|
|
|
* @param string $haystack <p>The string to search in.</p> |
|
6050
|
|
|
* @param string $needle <p>The substring to search for.</p> |
|
6051
|
|
|
* |
|
6052
|
|
|
* @psalm-pure |
|
6053
|
|
|
* |
|
6054
|
|
|
* @return bool |
|
6055
|
|
|
*/ |
|
6056
|
12 |
|
public static function str_iends_with(string $haystack, string $needle): bool |
|
6057
|
|
|
{ |
|
6058
|
12 |
|
if ($needle === '') { |
|
6059
|
2 |
|
return true; |
|
6060
|
|
|
} |
|
6061
|
|
|
|
|
6062
|
12 |
|
if ($haystack === '') { |
|
6063
|
|
|
return false; |
|
6064
|
|
|
} |
|
6065
|
|
|
|
|
6066
|
12 |
|
return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0; |
|
6067
|
|
|
} |
|
6068
|
|
|
|
|
6069
|
|
|
/** |
|
6070
|
|
|
* Returns true if the string ends with any of $substrings, false otherwise. |
|
6071
|
|
|
* |
|
6072
|
|
|
* - case-insensitive |
|
6073
|
|
|
* |
|
6074
|
|
|
* @param string $str <p>The input string.</p> |
|
6075
|
|
|
* @param string[] $substrings <p>Substrings to look for.</p> |
|
6076
|
|
|
* |
|
6077
|
|
|
* @psalm-pure |
|
6078
|
|
|
* |
|
6079
|
|
|
* @return bool |
|
6080
|
|
|
* <p>Whether or not $str ends with $substring.</p> |
|
6081
|
|
|
*/ |
|
6082
|
4 |
|
public static function str_iends_with_any(string $str, array $substrings): bool |
|
6083
|
|
|
{ |
|
6084
|
4 |
|
if ($substrings === []) { |
|
6085
|
|
|
return false; |
|
6086
|
|
|
} |
|
6087
|
|
|
|
|
6088
|
4 |
|
foreach ($substrings as &$substring) { |
|
6089
|
4 |
|
if (self::str_iends_with($str, $substring)) { |
|
6090
|
4 |
|
return true; |
|
6091
|
|
|
} |
|
6092
|
|
|
} |
|
6093
|
|
|
|
|
6094
|
|
|
return false; |
|
6095
|
|
|
} |
|
6096
|
|
|
|
|
6097
|
|
|
/** |
|
6098
|
|
|
* Inserts $substring into the string at the $index provided. |
|
6099
|
|
|
* |
|
6100
|
|
|
* @param string $str <p>The input string.</p> |
|
6101
|
|
|
* @param string $substring <p>String to be inserted.</p> |
|
6102
|
|
|
* @param int $index <p>The index at which to insert the substring.</p> |
|
6103
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
6104
|
|
|
* |
|
6105
|
|
|
* @psalm-pure |
|
6106
|
|
|
* |
|
6107
|
|
|
* @return string |
|
6108
|
|
|
*/ |
|
6109
|
8 |
|
public static function str_insert( |
|
6110
|
|
|
string $str, |
|
6111
|
|
|
string $substring, |
|
6112
|
|
|
int $index, |
|
6113
|
|
|
string $encoding = 'UTF-8' |
|
6114
|
|
|
): string { |
|
6115
|
8 |
|
if ($encoding === 'UTF-8') { |
|
6116
|
4 |
|
$len = (int) \mb_strlen($str); |
|
6117
|
4 |
|
if ($index > $len) { |
|
6118
|
|
|
return $str; |
|
6119
|
|
|
} |
|
6120
|
|
|
|
|
6121
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
|
6122
|
4 |
|
return (string) \mb_substr($str, 0, $index) . |
|
6123
|
4 |
|
$substring . |
|
6124
|
4 |
|
(string) \mb_substr($str, $index, $len); |
|
6125
|
|
|
} |
|
6126
|
|
|
|
|
6127
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
6128
|
|
|
|
|
6129
|
4 |
|
$len = (int) self::strlen($str, $encoding); |
|
6130
|
4 |
|
if ($index > $len) { |
|
6131
|
1 |
|
return $str; |
|
6132
|
|
|
} |
|
6133
|
|
|
|
|
6134
|
3 |
|
return ((string) self::substr($str, 0, $index, $encoding)) . |
|
6135
|
3 |
|
$substring . |
|
6136
|
3 |
|
((string) self::substr($str, $index, $len, $encoding)); |
|
6137
|
|
|
} |
|
6138
|
|
|
|
|
6139
|
|
|
/** |
|
6140
|
|
|
* Case-insensitive and UTF-8 safe version of <function>str_replace</function>. |
|
6141
|
|
|
* |
|
6142
|
|
|
* EXAMPLE: <code> |
|
6143
|
|
|
* UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn' |
|
6144
|
|
|
* </code> |
|
6145
|
|
|
* |
|
6146
|
|
|
* @see http://php.net/manual/en/function.str-ireplace.php |
|
6147
|
|
|
* |
|
6148
|
|
|
* @param string|string[] $search <p> |
|
6149
|
|
|
* Every replacement with search array is |
|
6150
|
|
|
* performed on the result of previous replacement. |
|
6151
|
|
|
* </p> |
|
6152
|
|
|
* @param string|string[] $replacement <p>The replacement.</p> |
|
6153
|
|
|
* @param string|string[] $subject <p> |
|
6154
|
|
|
* If subject is an array, then the search and |
|
6155
|
|
|
* replace is performed with every entry of |
|
6156
|
|
|
* subject, and the return value is an array as |
|
6157
|
|
|
* well. |
|
6158
|
|
|
* </p> |
|
6159
|
|
|
* @param int $count [optional] <p> |
|
6160
|
|
|
* The number of matched and replaced needles will |
|
6161
|
|
|
* be returned in count which is passed by |
|
6162
|
|
|
* reference. |
|
6163
|
|
|
* </p> |
|
6164
|
|
|
* |
|
6165
|
|
|
* @psalm-pure |
|
6166
|
|
|
* |
|
6167
|
|
|
* @return string|string[] |
|
6168
|
|
|
* <p>A string or an array of replacements.</p> |
|
6169
|
|
|
* |
|
6170
|
|
|
* @template TStrIReplaceSubject |
|
6171
|
|
|
* @phpstan-param TStrIReplaceSubject $subject |
|
6172
|
|
|
* @phpstan-return TStrIReplaceSubject |
|
6173
|
|
|
*/ |
|
6174
|
29 |
|
public static function str_ireplace($search, $replacement, $subject, &$count = null) |
|
6175
|
|
|
{ |
|
6176
|
29 |
|
$search = (array) $search; |
|
6177
|
|
|
|
|
6178
|
|
|
/** @noinspection AlterInForeachInspection */ |
|
6179
|
29 |
|
foreach ($search as &$s) { |
|
6180
|
29 |
|
$s = (string) $s; |
|
6181
|
29 |
|
if ($s === '') { |
|
6182
|
6 |
|
$s = '/^(?<=.)$/'; |
|
6183
|
|
|
} else { |
|
6184
|
29 |
|
$s = '/' . \preg_quote($s, '/') . '/ui'; |
|
6185
|
|
|
} |
|
6186
|
|
|
} |
|
6187
|
|
|
|
|
6188
|
|
|
// fallback |
|
6189
|
|
|
/** @phpstan-ignore-next-line - only a fallback for PHP8 */ |
|
6190
|
29 |
|
if ($replacement === null) { |
|
|
|
|
|
|
6191
|
1 |
|
$replacement = ''; |
|
6192
|
|
|
} |
|
6193
|
|
|
/** @phpstan-ignore-next-line - only a fallback for PHP8 */ |
|
6194
|
29 |
|
if ($subject === null) { |
|
6195
|
1 |
|
$subject = ''; |
|
6196
|
|
|
} |
|
6197
|
|
|
|
|
6198
|
|
|
/** |
|
6199
|
|
|
* @psalm-suppress PossiblyNullArgument |
|
6200
|
|
|
* @phpstan-var TStrIReplaceSubject $subject |
|
6201
|
|
|
*/ |
|
6202
|
29 |
|
$subject = \preg_replace($search, $replacement, $subject, -1, $count); |
|
6203
|
|
|
|
|
6204
|
29 |
|
return $subject; |
|
6205
|
|
|
} |
|
6206
|
|
|
|
|
6207
|
|
|
/** |
|
6208
|
|
|
* Replaces $search from the beginning of string with $replacement. |
|
6209
|
|
|
* |
|
6210
|
|
|
* @param string $str <p>The input string.</p> |
|
6211
|
|
|
* @param string $search <p>The string to search for.</p> |
|
6212
|
|
|
* @param string $replacement <p>The replacement.</p> |
|
6213
|
|
|
* |
|
6214
|
|
|
* @psalm-pure |
|
6215
|
|
|
* |
|
6216
|
|
|
* @return string |
|
6217
|
|
|
* <p>The string after the replacement.</p> |
|
6218
|
|
|
*/ |
|
6219
|
17 |
|
public static function str_ireplace_beginning(string $str, string $search, string $replacement): string |
|
6220
|
|
|
{ |
|
6221
|
17 |
|
if ($str === '') { |
|
6222
|
4 |
|
if ($replacement === '') { |
|
6223
|
2 |
|
return ''; |
|
6224
|
|
|
} |
|
6225
|
|
|
|
|
6226
|
2 |
|
if ($search === '') { |
|
6227
|
2 |
|
return $replacement; |
|
6228
|
|
|
} |
|
6229
|
|
|
} |
|
6230
|
|
|
|
|
6231
|
13 |
|
if ($search === '') { |
|
6232
|
2 |
|
return $str . $replacement; |
|
6233
|
|
|
} |
|
6234
|
|
|
|
|
6235
|
11 |
|
$searchLength = \strlen($search); |
|
6236
|
11 |
|
if (\strncasecmp($str, $search, $searchLength) === 0) { |
|
6237
|
10 |
|
return $replacement . \substr($str, $searchLength); |
|
6238
|
|
|
} |
|
6239
|
|
|
|
|
6240
|
1 |
|
return $str; |
|
6241
|
|
|
} |
|
6242
|
|
|
|
|
6243
|
|
|
/** |
|
6244
|
|
|
* Replaces $search from the ending of string with $replacement. |
|
6245
|
|
|
* |
|
6246
|
|
|
* @param string $str <p>The input string.</p> |
|
6247
|
|
|
* @param string $search <p>The string to search for.</p> |
|
6248
|
|
|
* @param string $replacement <p>The replacement.</p> |
|
6249
|
|
|
* |
|
6250
|
|
|
* @psalm-pure |
|
6251
|
|
|
* |
|
6252
|
|
|
* @return string |
|
6253
|
|
|
* <p>The string after the replacement.</p> |
|
6254
|
|
|
*/ |
|
6255
|
17 |
|
public static function str_ireplace_ending(string $str, string $search, string $replacement): string |
|
6256
|
|
|
{ |
|
6257
|
17 |
|
if ($str === '') { |
|
6258
|
4 |
|
if ($replacement === '') { |
|
6259
|
2 |
|
return ''; |
|
6260
|
|
|
} |
|
6261
|
|
|
|
|
6262
|
2 |
|
if ($search === '') { |
|
6263
|
2 |
|
return $replacement; |
|
6264
|
|
|
} |
|
6265
|
|
|
} |
|
6266
|
|
|
|
|
6267
|
13 |
|
if ($search === '') { |
|
6268
|
2 |
|
return $str . $replacement; |
|
6269
|
|
|
} |
|
6270
|
|
|
|
|
6271
|
11 |
|
if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) { |
|
6272
|
9 |
|
$str = \substr($str, 0, -\strlen($search)) . $replacement; |
|
6273
|
|
|
} |
|
6274
|
|
|
|
|
6275
|
11 |
|
return $str; |
|
6276
|
|
|
} |
|
6277
|
|
|
|
|
6278
|
|
|
/** |
|
6279
|
|
|
* Check if the string starts with the given substring, case-insensitive. |
|
6280
|
|
|
* |
|
6281
|
|
|
* EXAMPLE: <code> |
|
6282
|
|
|
* UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true |
|
6283
|
|
|
* UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true |
|
6284
|
|
|
* </code> |
|
6285
|
|
|
* |
|
6286
|
|
|
* @param string $haystack <p>The string to search in.</p> |
|
6287
|
|
|
* @param string $needle <p>The substring to search for.</p> |
|
6288
|
|
|
* |
|
6289
|
|
|
* @psalm-pure |
|
6290
|
|
|
* |
|
6291
|
|
|
* @return bool |
|
6292
|
|
|
*/ |
|
6293
|
13 |
|
public static function str_istarts_with(string $haystack, string $needle): bool |
|
6294
|
|
|
{ |
|
6295
|
13 |
|
if ($needle === '') { |
|
6296
|
2 |
|
return true; |
|
6297
|
|
|
} |
|
6298
|
|
|
|
|
6299
|
13 |
|
if ($haystack === '') { |
|
6300
|
|
|
return false; |
|
6301
|
|
|
} |
|
6302
|
|
|
|
|
6303
|
13 |
|
return self::stripos($haystack, $needle) === 0; |
|
6304
|
|
|
} |
|
6305
|
|
|
|
|
6306
|
|
|
/** |
|
6307
|
|
|
* Returns true if the string begins with any of $substrings, false otherwise. |
|
6308
|
|
|
* |
|
6309
|
|
|
* - case-insensitive |
|
6310
|
|
|
* |
|
6311
|
|
|
* @param string $str <p>The input string.</p> |
|
6312
|
|
|
* @param array $substrings <p>Substrings to look for.</p> |
|
6313
|
|
|
* |
|
6314
|
|
|
* @psalm-pure |
|
6315
|
|
|
* |
|
6316
|
|
|
* @return bool |
|
6317
|
|
|
* <p>Whether or not $str starts with $substring.</p> |
|
6318
|
|
|
*/ |
|
6319
|
5 |
|
public static function str_istarts_with_any(string $str, array $substrings): bool |
|
6320
|
|
|
{ |
|
6321
|
5 |
|
if ($str === '') { |
|
6322
|
|
|
return false; |
|
6323
|
|
|
} |
|
6324
|
|
|
|
|
6325
|
5 |
|
if ($substrings === []) { |
|
6326
|
|
|
return false; |
|
6327
|
|
|
} |
|
6328
|
|
|
|
|
6329
|
5 |
|
foreach ($substrings as &$substring) { |
|
6330
|
5 |
|
if (self::str_istarts_with($str, $substring)) { |
|
6331
|
5 |
|
return true; |
|
6332
|
|
|
} |
|
6333
|
|
|
} |
|
6334
|
|
|
|
|
6335
|
1 |
|
return false; |
|
6336
|
|
|
} |
|
6337
|
|
|
|
|
6338
|
|
|
/** |
|
6339
|
|
|
* Gets the substring after the first occurrence of a separator. |
|
6340
|
|
|
* |
|
6341
|
|
|
* @param string $str <p>The input string.</p> |
|
6342
|
|
|
* @param string $separator <p>The string separator.</p> |
|
6343
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
6344
|
|
|
* |
|
6345
|
|
|
* @psalm-pure |
|
6346
|
|
|
* |
|
6347
|
|
|
* @return string |
|
6348
|
|
|
*/ |
|
6349
|
1 |
|
public static function str_isubstr_after_first_separator( |
|
6350
|
|
|
string $str, |
|
6351
|
|
|
string $separator, |
|
6352
|
|
|
string $encoding = 'UTF-8' |
|
6353
|
|
|
): string { |
|
6354
|
1 |
|
if ($separator === '' || $str === '') { |
|
6355
|
1 |
|
return ''; |
|
6356
|
|
|
} |
|
6357
|
|
|
|
|
6358
|
1 |
|
$offset = self::stripos($str, $separator); |
|
6359
|
1 |
|
if ($offset === false) { |
|
6360
|
1 |
|
return ''; |
|
6361
|
|
|
} |
|
6362
|
|
|
|
|
6363
|
1 |
|
if ($encoding === 'UTF-8') { |
|
6364
|
1 |
|
return (string) \mb_substr( |
|
6365
|
1 |
|
$str, |
|
6366
|
1 |
|
$offset + (int) \mb_strlen($separator) |
|
6367
|
|
|
); |
|
6368
|
|
|
} |
|
6369
|
|
|
|
|
6370
|
|
|
return (string) self::substr( |
|
6371
|
|
|
$str, |
|
6372
|
|
|
$offset + (int) self::strlen($separator, $encoding), |
|
6373
|
|
|
null, |
|
6374
|
|
|
$encoding |
|
6375
|
|
|
); |
|
6376
|
|
|
} |
|
6377
|
|
|
|
|
6378
|
|
|
/** |
|
6379
|
|
|
* Gets the substring after the last occurrence of a separator. |
|
6380
|
|
|
* |
|
6381
|
|
|
* @param string $str <p>The input string.</p> |
|
6382
|
|
|
* @param string $separator <p>The string separator.</p> |
|
6383
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
6384
|
|
|
* |
|
6385
|
|
|
* @psalm-pure |
|
6386
|
|
|
* |
|
6387
|
|
|
* @return string |
|
6388
|
|
|
*/ |
|
6389
|
1 |
|
public static function str_isubstr_after_last_separator( |
|
6390
|
|
|
string $str, |
|
6391
|
|
|
string $separator, |
|
6392
|
|
|
string $encoding = 'UTF-8' |
|
6393
|
|
|
): string { |
|
6394
|
1 |
|
if ($separator === '' || $str === '') { |
|
6395
|
1 |
|
return ''; |
|
6396
|
|
|
} |
|
6397
|
|
|
|
|
6398
|
1 |
|
$offset = self::strripos($str, $separator); |
|
6399
|
1 |
|
if ($offset === false) { |
|
6400
|
1 |
|
return ''; |
|
6401
|
|
|
} |
|
6402
|
|
|
|
|
6403
|
1 |
|
if ($encoding === 'UTF-8') { |
|
6404
|
1 |
|
return (string) \mb_substr( |
|
6405
|
1 |
|
$str, |
|
6406
|
1 |
|
$offset + (int) self::strlen($separator) |
|
6407
|
|
|
); |
|
6408
|
|
|
} |
|
6409
|
|
|
|
|
6410
|
|
|
return (string) self::substr( |
|
6411
|
|
|
$str, |
|
6412
|
|
|
$offset + (int) self::strlen($separator, $encoding), |
|
6413
|
|
|
null, |
|
6414
|
|
|
$encoding |
|
6415
|
|
|
); |
|
6416
|
|
|
} |
|
6417
|
|
|
|
|
6418
|
|
|
/** |
|
6419
|
|
|
* Gets the substring before the first occurrence of a separator. |
|
6420
|
|
|
* |
|
6421
|
|
|
* @param string $str <p>The input string.</p> |
|
6422
|
|
|
* @param string $separator <p>The string separator.</p> |
|
6423
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
6424
|
|
|
* |
|
6425
|
|
|
* @psalm-pure |
|
6426
|
|
|
* |
|
6427
|
|
|
* @return string |
|
6428
|
|
|
*/ |
|
6429
|
1 |
|
public static function str_isubstr_before_first_separator( |
|
6430
|
|
|
string $str, |
|
6431
|
|
|
string $separator, |
|
6432
|
|
|
string $encoding = 'UTF-8' |
|
6433
|
|
|
): string { |
|
6434
|
1 |
|
if ($separator === '' || $str === '') { |
|
6435
|
1 |
|
return ''; |
|
6436
|
|
|
} |
|
6437
|
|
|
|
|
6438
|
1 |
|
$offset = self::stripos($str, $separator); |
|
6439
|
1 |
|
if ($offset === false) { |
|
6440
|
1 |
|
return ''; |
|
6441
|
|
|
} |
|
6442
|
|
|
|
|
6443
|
1 |
|
if ($encoding === 'UTF-8') { |
|
6444
|
1 |
|
return (string) \mb_substr($str, 0, $offset); |
|
6445
|
|
|
} |
|
6446
|
|
|
|
|
6447
|
|
|
return (string) self::substr($str, 0, $offset, $encoding); |
|
6448
|
|
|
} |
|
6449
|
|
|
|
|
6450
|
|
|
/** |
|
6451
|
|
|
* Gets the substring before the last occurrence of a separator. |
|
6452
|
|
|
* |
|
6453
|
|
|
* @param string $str <p>The input string.</p> |
|
6454
|
|
|
* @param string $separator <p>The string separator.</p> |
|
6455
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
6456
|
|
|
* |
|
6457
|
|
|
* @psalm-pure |
|
6458
|
|
|
* |
|
6459
|
|
|
* @return string |
|
6460
|
|
|
*/ |
|
6461
|
1 |
|
public static function str_isubstr_before_last_separator( |
|
6462
|
|
|
string $str, |
|
6463
|
|
|
string $separator, |
|
6464
|
|
|
string $encoding = 'UTF-8' |
|
6465
|
|
|
): string { |
|
6466
|
1 |
|
if ($separator === '' || $str === '') { |
|
6467
|
1 |
|
return ''; |
|
6468
|
|
|
} |
|
6469
|
|
|
|
|
6470
|
1 |
|
if ($encoding === 'UTF-8') { |
|
6471
|
1 |
|
$offset = \mb_strripos($str, $separator); |
|
6472
|
1 |
|
if ($offset === false) { |
|
6473
|
1 |
|
return ''; |
|
6474
|
|
|
} |
|
6475
|
|
|
|
|
6476
|
1 |
|
return (string) \mb_substr($str, 0, $offset); |
|
6477
|
|
|
} |
|
6478
|
|
|
|
|
6479
|
|
|
$offset = self::strripos($str, $separator, 0, $encoding); |
|
6480
|
|
|
if ($offset === false) { |
|
6481
|
|
|
return ''; |
|
6482
|
|
|
} |
|
6483
|
|
|
|
|
6484
|
|
|
return (string) self::substr($str, 0, $offset, $encoding); |
|
6485
|
|
|
} |
|
6486
|
|
|
|
|
6487
|
|
|
/** |
|
6488
|
|
|
* Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle". |
|
6489
|
|
|
* |
|
6490
|
|
|
* @param string $str <p>The input string.</p> |
|
6491
|
|
|
* @param string $needle <p>The string to look for.</p> |
|
6492
|
|
|
* @param bool $before_needle [optional] <p>Default: false</p> |
|
6493
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
6494
|
|
|
* |
|
6495
|
|
|
* @psalm-pure |
|
6496
|
|
|
* |
|
6497
|
|
|
* @return string |
|
6498
|
|
|
*/ |
|
6499
|
2 |
|
public static function str_isubstr_first( |
|
6500
|
|
|
string $str, |
|
6501
|
|
|
string $needle, |
|
6502
|
|
|
bool $before_needle = false, |
|
6503
|
|
|
string $encoding = 'UTF-8' |
|
6504
|
|
|
): string { |
|
6505
|
|
|
if ( |
|
6506
|
2 |
|
$needle === '' |
|
6507
|
|
|
|| |
|
6508
|
2 |
|
$str === '' |
|
6509
|
|
|
) { |
|
6510
|
2 |
|
return ''; |
|
6511
|
|
|
} |
|
6512
|
|
|
|
|
6513
|
2 |
|
$part = self::stristr( |
|
6514
|
2 |
|
$str, |
|
6515
|
2 |
|
$needle, |
|
6516
|
2 |
|
$before_needle, |
|
6517
|
2 |
|
$encoding |
|
6518
|
|
|
); |
|
6519
|
2 |
|
if ($part === false) { |
|
6520
|
2 |
|
return ''; |
|
6521
|
|
|
} |
|
6522
|
|
|
|
|
6523
|
2 |
|
return $part; |
|
6524
|
|
|
} |
|
6525
|
|
|
|
|
6526
|
|
|
/** |
|
6527
|
|
|
* Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle". |
|
6528
|
|
|
* |
|
6529
|
|
|
* @param string $str <p>The input string.</p> |
|
6530
|
|
|
* @param string $needle <p>The string to look for.</p> |
|
6531
|
|
|
* @param bool $before_needle [optional] <p>Default: false</p> |
|
6532
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
6533
|
|
|
* |
|
6534
|
|
|
* @psalm-pure |
|
6535
|
|
|
* |
|
6536
|
|
|
* @return string |
|
6537
|
|
|
*/ |
|
6538
|
1 |
|
public static function str_isubstr_last( |
|
6539
|
|
|
string $str, |
|
6540
|
|
|
string $needle, |
|
6541
|
|
|
bool $before_needle = false, |
|
6542
|
|
|
string $encoding = 'UTF-8' |
|
6543
|
|
|
): string { |
|
6544
|
|
|
if ( |
|
6545
|
1 |
|
$needle === '' |
|
6546
|
|
|
|| |
|
6547
|
1 |
|
$str === '' |
|
6548
|
|
|
) { |
|
6549
|
1 |
|
return ''; |
|
6550
|
|
|
} |
|
6551
|
|
|
|
|
6552
|
1 |
|
$part = self::strrichr( |
|
6553
|
1 |
|
$str, |
|
6554
|
1 |
|
$needle, |
|
6555
|
1 |
|
$before_needle, |
|
6556
|
1 |
|
$encoding |
|
6557
|
|
|
); |
|
6558
|
1 |
|
if ($part === false) { |
|
6559
|
1 |
|
return ''; |
|
6560
|
|
|
} |
|
6561
|
|
|
|
|
6562
|
1 |
|
return $part; |
|
6563
|
|
|
} |
|
6564
|
|
|
|
|
6565
|
|
|
/** |
|
6566
|
|
|
* Returns the last $n characters of the string. |
|
6567
|
|
|
* |
|
6568
|
|
|
* @param string $str <p>The input string.</p> |
|
6569
|
|
|
* @param int $n <p>Number of characters to retrieve from the end.</p> |
|
6570
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
6571
|
|
|
* |
|
6572
|
|
|
* @psalm-pure |
|
6573
|
|
|
* |
|
6574
|
|
|
* @return string |
|
6575
|
|
|
*/ |
|
6576
|
12 |
|
public static function str_last_char( |
|
6577
|
|
|
string $str, |
|
6578
|
|
|
int $n = 1, |
|
6579
|
|
|
string $encoding = 'UTF-8' |
|
6580
|
|
|
): string { |
|
6581
|
12 |
|
if ($str === '' || $n <= 0) { |
|
6582
|
4 |
|
return ''; |
|
6583
|
|
|
} |
|
6584
|
|
|
|
|
6585
|
8 |
|
if ($encoding === 'UTF-8') { |
|
6586
|
4 |
|
return (string) \mb_substr($str, -$n); |
|
6587
|
|
|
} |
|
6588
|
|
|
|
|
6589
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
6590
|
|
|
|
|
6591
|
4 |
|
return (string) self::substr($str, -$n, null, $encoding); |
|
6592
|
|
|
} |
|
6593
|
|
|
|
|
6594
|
|
|
/** |
|
6595
|
|
|
* Limit the number of characters in a string. |
|
6596
|
|
|
* |
|
6597
|
|
|
* @param string $str <p>The input string.</p> |
|
6598
|
|
|
* @param int $length [optional] <p>Default: 100</p> |
|
6599
|
|
|
* @param string $str_add_on [optional] <p>Default: …</p> |
|
6600
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
6601
|
|
|
* |
|
6602
|
|
|
* @psalm-pure |
|
6603
|
|
|
* |
|
6604
|
|
|
* @return string |
|
6605
|
|
|
*/ |
|
6606
|
2 |
|
public static function str_limit( |
|
6607
|
|
|
string $str, |
|
6608
|
|
|
int $length = 100, |
|
6609
|
|
|
string $str_add_on = '…', |
|
6610
|
|
|
string $encoding = 'UTF-8' |
|
6611
|
|
|
): string { |
|
6612
|
2 |
|
if ($str === '' || $length <= 0) { |
|
6613
|
2 |
|
return ''; |
|
6614
|
|
|
} |
|
6615
|
|
|
|
|
6616
|
2 |
|
if ($encoding === 'UTF-8') { |
|
6617
|
2 |
|
if ((int) \mb_strlen($str) <= $length) { |
|
6618
|
2 |
|
return $str; |
|
6619
|
|
|
} |
|
6620
|
|
|
|
|
6621
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
|
6622
|
2 |
|
return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on; |
|
6623
|
|
|
} |
|
6624
|
|
|
|
|
6625
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
6626
|
|
|
|
|
6627
|
|
|
if ((int) self::strlen($str, $encoding) <= $length) { |
|
6628
|
|
|
return $str; |
|
6629
|
|
|
} |
|
6630
|
|
|
|
|
6631
|
|
|
return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on; |
|
6632
|
|
|
} |
|
6633
|
|
|
|
|
6634
|
|
|
/** |
|
6635
|
|
|
* Limit the number of characters in a string, but also after the next word. |
|
6636
|
|
|
* |
|
6637
|
|
|
* EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code> |
|
6638
|
|
|
* |
|
6639
|
|
|
* @param string $str <p>The input string.</p> |
|
6640
|
|
|
* @param int $length [optional] <p>Default: 100</p> |
|
6641
|
|
|
* @param string $str_add_on [optional] <p>Default: …</p> |
|
6642
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
6643
|
|
|
* |
|
6644
|
|
|
* @psalm-pure |
|
6645
|
|
|
* |
|
6646
|
|
|
* @return string |
|
6647
|
|
|
*/ |
|
6648
|
6 |
|
public static function str_limit_after_word( |
|
6649
|
|
|
string $str, |
|
6650
|
|
|
int $length = 100, |
|
6651
|
|
|
string $str_add_on = '…', |
|
6652
|
|
|
string $encoding = 'UTF-8' |
|
6653
|
|
|
): string { |
|
6654
|
6 |
|
if ($str === '' || $length <= 0) { |
|
6655
|
2 |
|
return ''; |
|
6656
|
|
|
} |
|
6657
|
|
|
|
|
6658
|
6 |
|
if ($encoding === 'UTF-8') { |
|
6659
|
2 |
|
if ((int) \mb_strlen($str) <= $length) { |
|
6660
|
2 |
|
return $str; |
|
6661
|
|
|
} |
|
6662
|
|
|
|
|
6663
|
2 |
|
if (\mb_substr($str, $length - 1, 1) === ' ') { |
|
6664
|
2 |
|
return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on; |
|
6665
|
|
|
} |
|
6666
|
|
|
|
|
6667
|
2 |
|
$str = \mb_substr($str, 0, $length); |
|
6668
|
|
|
|
|
6669
|
2 |
|
$array = \explode(' ', $str, -1); |
|
6670
|
2 |
|
$new_str = \implode(' ', $array); |
|
6671
|
|
|
|
|
6672
|
2 |
|
if ($new_str === '') { |
|
6673
|
2 |
|
return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on; |
|
6674
|
|
|
} |
|
6675
|
|
|
} else { |
|
6676
|
4 |
|
if ((int) self::strlen($str, $encoding) <= $length) { |
|
6677
|
|
|
return $str; |
|
6678
|
|
|
} |
|
6679
|
|
|
|
|
6680
|
4 |
|
if (self::substr($str, $length - 1, 1, $encoding) === ' ') { |
|
6681
|
3 |
|
return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on; |
|
6682
|
|
|
} |
|
6683
|
|
|
|
|
6684
|
|
|
/** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ |
|
6685
|
1 |
|
$str = self::substr($str, 0, $length, $encoding); |
|
6686
|
1 |
|
if ($str === false) { |
|
6687
|
|
|
return '' . $str_add_on; |
|
6688
|
|
|
} |
|
6689
|
|
|
|
|
6690
|
1 |
|
$array = \explode(' ', $str, -1); |
|
6691
|
1 |
|
$new_str = \implode(' ', $array); |
|
6692
|
|
|
|
|
6693
|
1 |
|
if ($new_str === '') { |
|
6694
|
|
|
return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on; |
|
6695
|
|
|
} |
|
6696
|
|
|
} |
|
6697
|
|
|
|
|
6698
|
3 |
|
return $new_str . $str_add_on; |
|
6699
|
|
|
} |
|
6700
|
|
|
|
|
6701
|
|
|
/** |
|
6702
|
|
|
* Returns the longest common prefix between the $str1 and $str2. |
|
6703
|
|
|
* |
|
6704
|
|
|
* @param string $str1 <p>The input sting.</p> |
|
6705
|
|
|
* @param string $str2 <p>Second string for comparison.</p> |
|
6706
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
6707
|
|
|
* |
|
6708
|
|
|
* @psalm-pure |
|
6709
|
|
|
* |
|
6710
|
|
|
* @return string |
|
6711
|
|
|
*/ |
|
6712
|
10 |
|
public static function str_longest_common_prefix( |
|
6713
|
|
|
string $str1, |
|
6714
|
|
|
string $str2, |
|
6715
|
|
|
string $encoding = 'UTF-8' |
|
6716
|
|
|
): string { |
|
6717
|
|
|
// init |
|
6718
|
10 |
|
$longest_common_prefix = ''; |
|
6719
|
|
|
|
|
6720
|
10 |
|
if ($encoding === 'UTF-8') { |
|
6721
|
5 |
|
$max_length = (int) \min( |
|
6722
|
5 |
|
\mb_strlen($str1), |
|
6723
|
5 |
|
\mb_strlen($str2) |
|
6724
|
|
|
); |
|
6725
|
|
|
|
|
6726
|
5 |
|
for ($i = 0; $i < $max_length; ++$i) { |
|
6727
|
4 |
|
$char = \mb_substr($str1, $i, 1); |
|
6728
|
|
|
|
|
6729
|
|
|
if ( |
|
6730
|
4 |
|
$char !== false |
|
6731
|
|
|
&& |
|
6732
|
4 |
|
$char === \mb_substr($str2, $i, 1) |
|
6733
|
|
|
) { |
|
6734
|
3 |
|
$longest_common_prefix .= $char; |
|
6735
|
|
|
} else { |
|
6736
|
3 |
|
break; |
|
6737
|
|
|
} |
|
6738
|
|
|
} |
|
6739
|
|
|
} else { |
|
6740
|
5 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
6741
|
|
|
|
|
6742
|
5 |
|
$max_length = (int) \min( |
|
6743
|
5 |
|
self::strlen($str1, $encoding), |
|
6744
|
5 |
|
self::strlen($str2, $encoding) |
|
6745
|
|
|
); |
|
6746
|
|
|
|
|
6747
|
5 |
|
for ($i = 0; $i < $max_length; ++$i) { |
|
6748
|
4 |
|
$char = self::substr($str1, $i, 1, $encoding); |
|
6749
|
|
|
|
|
6750
|
|
|
if ( |
|
6751
|
4 |
|
$char !== false |
|
6752
|
|
|
&& |
|
6753
|
4 |
|
$char === self::substr($str2, $i, 1, $encoding) |
|
6754
|
|
|
) { |
|
6755
|
3 |
|
$longest_common_prefix .= $char; |
|
6756
|
|
|
} else { |
|
6757
|
3 |
|
break; |
|
6758
|
|
|
} |
|
6759
|
|
|
} |
|
6760
|
|
|
} |
|
6761
|
|
|
|
|
6762
|
10 |
|
return $longest_common_prefix; |
|
6763
|
|
|
} |
|
6764
|
|
|
|
|
6765
|
|
|
/** |
|
6766
|
|
|
* Returns the longest common substring between the $str1 and $str2. |
|
6767
|
|
|
* In the case of ties, it returns that which occurs first. |
|
6768
|
|
|
* |
|
6769
|
|
|
* @param string $str1 |
|
6770
|
|
|
* @param string $str2 <p>Second string for comparison.</p> |
|
6771
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
6772
|
|
|
* |
|
6773
|
|
|
* @psalm-pure |
|
6774
|
|
|
* |
|
6775
|
|
|
* @return string |
|
6776
|
|
|
* <p>A string with its $str being the longest common substring.</p> |
|
6777
|
|
|
*/ |
|
6778
|
11 |
|
public static function str_longest_common_substring( |
|
6779
|
|
|
string $str1, |
|
6780
|
|
|
string $str2, |
|
6781
|
|
|
string $encoding = 'UTF-8' |
|
6782
|
|
|
): string { |
|
6783
|
11 |
|
if ($str1 === '' || $str2 === '') { |
|
6784
|
2 |
|
return ''; |
|
6785
|
|
|
} |
|
6786
|
|
|
|
|
6787
|
|
|
// Uses dynamic programming to solve |
|
6788
|
|
|
// http://en.wikipedia.org/wiki/Longest_common_substring_problem |
|
6789
|
|
|
|
|
6790
|
9 |
|
if ($encoding === 'UTF-8') { |
|
6791
|
4 |
|
$str_length = (int) \mb_strlen($str1); |
|
6792
|
4 |
|
$other_length = (int) \mb_strlen($str2); |
|
6793
|
|
|
} else { |
|
6794
|
5 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
6795
|
|
|
|
|
6796
|
5 |
|
$str_length = (int) self::strlen($str1, $encoding); |
|
6797
|
5 |
|
$other_length = (int) self::strlen($str2, $encoding); |
|
6798
|
|
|
} |
|
6799
|
|
|
|
|
6800
|
|
|
// Return if either string is empty |
|
6801
|
9 |
|
if ($str_length === 0 || $other_length === 0) { |
|
6802
|
|
|
return ''; |
|
6803
|
|
|
} |
|
6804
|
|
|
|
|
6805
|
9 |
|
$len = 0; |
|
6806
|
9 |
|
$end = 0; |
|
6807
|
9 |
|
$table = \array_fill( |
|
6808
|
9 |
|
0, |
|
6809
|
9 |
|
$str_length + 1, |
|
6810
|
9 |
|
\array_fill(0, $other_length + 1, 0) |
|
6811
|
|
|
); |
|
6812
|
|
|
|
|
6813
|
9 |
|
if ($encoding === 'UTF-8') { |
|
6814
|
9 |
|
for ($i = 1; $i <= $str_length; ++$i) { |
|
6815
|
9 |
|
for ($j = 1; $j <= $other_length; ++$j) { |
|
6816
|
9 |
|
$str_char = \mb_substr($str1, $i - 1, 1); |
|
6817
|
9 |
|
$other_char = \mb_substr($str2, $j - 1, 1); |
|
6818
|
|
|
|
|
6819
|
9 |
|
if ($str_char === $other_char) { |
|
6820
|
8 |
|
$table[$i][$j] = $table[$i - 1][$j - 1] + 1; |
|
6821
|
8 |
|
if ($table[$i][$j] > $len) { |
|
6822
|
8 |
|
$len = $table[$i][$j]; |
|
6823
|
8 |
|
$end = $i; |
|
6824
|
|
|
} |
|
6825
|
|
|
} else { |
|
6826
|
9 |
|
$table[$i][$j] = 0; |
|
6827
|
|
|
} |
|
6828
|
|
|
} |
|
6829
|
|
|
} |
|
6830
|
|
|
} else { |
|
6831
|
|
|
for ($i = 1; $i <= $str_length; ++$i) { |
|
6832
|
|
|
for ($j = 1; $j <= $other_length; ++$j) { |
|
6833
|
|
|
$str_char = self::substr($str1, $i - 1, 1, $encoding); |
|
6834
|
|
|
$other_char = self::substr($str2, $j - 1, 1, $encoding); |
|
6835
|
|
|
|
|
6836
|
|
|
if ($str_char === $other_char) { |
|
6837
|
|
|
$table[$i][$j] = $table[$i - 1][$j - 1] + 1; |
|
6838
|
|
|
if ($table[$i][$j] > $len) { |
|
6839
|
|
|
$len = $table[$i][$j]; |
|
6840
|
|
|
$end = $i; |
|
6841
|
|
|
} |
|
6842
|
|
|
} else { |
|
6843
|
|
|
$table[$i][$j] = 0; |
|
6844
|
|
|
} |
|
6845
|
|
|
} |
|
6846
|
|
|
} |
|
6847
|
|
|
} |
|
6848
|
|
|
|
|
6849
|
9 |
|
if ($encoding === 'UTF-8') { |
|
6850
|
9 |
|
return (string) \mb_substr($str1, $end - $len, $len); |
|
6851
|
|
|
} |
|
6852
|
|
|
|
|
6853
|
|
|
return (string) self::substr($str1, $end - $len, $len, $encoding); |
|
6854
|
|
|
} |
|
6855
|
|
|
|
|
6856
|
|
|
/** |
|
6857
|
|
|
* Returns the longest common suffix between the $str1 and $str2. |
|
6858
|
|
|
* |
|
6859
|
|
|
* @param string $str1 |
|
6860
|
|
|
* @param string $str2 <p>Second string for comparison.</p> |
|
6861
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
6862
|
|
|
* |
|
6863
|
|
|
* @psalm-pure |
|
6864
|
|
|
* |
|
6865
|
|
|
* @return string |
|
6866
|
|
|
*/ |
|
6867
|
10 |
|
public static function str_longest_common_suffix( |
|
6868
|
|
|
string $str1, |
|
6869
|
|
|
string $str2, |
|
6870
|
|
|
string $encoding = 'UTF-8' |
|
6871
|
|
|
): string { |
|
6872
|
10 |
|
if ($str1 === '' || $str2 === '') { |
|
6873
|
2 |
|
return ''; |
|
6874
|
|
|
} |
|
6875
|
|
|
|
|
6876
|
8 |
|
if ($encoding === 'UTF-8') { |
|
6877
|
4 |
|
$max_length = (int) \min( |
|
6878
|
4 |
|
\mb_strlen($str1, $encoding), |
|
6879
|
4 |
|
\mb_strlen($str2, $encoding) |
|
6880
|
|
|
); |
|
6881
|
|
|
|
|
6882
|
4 |
|
$longest_common_suffix = ''; |
|
6883
|
4 |
|
for ($i = 1; $i <= $max_length; ++$i) { |
|
6884
|
4 |
|
$char = \mb_substr($str1, -$i, 1); |
|
6885
|
|
|
|
|
6886
|
|
|
if ( |
|
6887
|
4 |
|
$char !== false |
|
6888
|
|
|
&& |
|
6889
|
4 |
|
$char === \mb_substr($str2, -$i, 1) |
|
6890
|
|
|
) { |
|
6891
|
3 |
|
$longest_common_suffix = $char . $longest_common_suffix; |
|
6892
|
|
|
} else { |
|
6893
|
3 |
|
break; |
|
6894
|
|
|
} |
|
6895
|
|
|
} |
|
6896
|
|
|
} else { |
|
6897
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
6898
|
|
|
|
|
6899
|
4 |
|
$max_length = (int) \min( |
|
6900
|
4 |
|
self::strlen($str1, $encoding), |
|
6901
|
4 |
|
self::strlen($str2, $encoding) |
|
6902
|
|
|
); |
|
6903
|
|
|
|
|
6904
|
4 |
|
$longest_common_suffix = ''; |
|
6905
|
4 |
|
for ($i = 1; $i <= $max_length; ++$i) { |
|
6906
|
4 |
|
$char = self::substr($str1, -$i, 1, $encoding); |
|
6907
|
|
|
|
|
6908
|
|
|
if ( |
|
6909
|
4 |
|
$char !== false |
|
6910
|
|
|
&& |
|
6911
|
4 |
|
$char === self::substr($str2, -$i, 1, $encoding) |
|
6912
|
|
|
) { |
|
6913
|
3 |
|
$longest_common_suffix = $char . $longest_common_suffix; |
|
6914
|
|
|
} else { |
|
6915
|
3 |
|
break; |
|
6916
|
|
|
} |
|
6917
|
|
|
} |
|
6918
|
|
|
} |
|
6919
|
|
|
|
|
6920
|
8 |
|
return $longest_common_suffix; |
|
6921
|
|
|
} |
|
6922
|
|
|
|
|
6923
|
|
|
/** |
|
6924
|
|
|
* Returns true if $str matches the supplied pattern, false otherwise. |
|
6925
|
|
|
* |
|
6926
|
|
|
* @param string $str <p>The input string.</p> |
|
6927
|
|
|
* @param string $pattern <p>Regex pattern to match against.</p> |
|
6928
|
|
|
* |
|
6929
|
|
|
* @psalm-pure |
|
6930
|
|
|
* |
|
6931
|
|
|
* @return bool |
|
6932
|
|
|
* <p>Whether or not $str matches the pattern.</p> |
|
6933
|
|
|
*/ |
|
6934
|
10 |
|
public static function str_matches_pattern(string $str, string $pattern): bool |
|
6935
|
|
|
{ |
|
6936
|
10 |
|
return (bool) \preg_match('/' . $pattern . '/u', $str); |
|
6937
|
|
|
} |
|
6938
|
|
|
|
|
6939
|
|
|
/** |
|
6940
|
|
|
* Returns whether or not a character exists at an index. Offsets may be |
|
6941
|
|
|
* negative to count from the last character in the string. Implements |
|
6942
|
|
|
* part of the ArrayAccess interface. |
|
6943
|
|
|
* |
|
6944
|
|
|
* @param string $str <p>The input string.</p> |
|
6945
|
|
|
* @param int $offset <p>The index to check.</p> |
|
6946
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
6947
|
|
|
* |
|
6948
|
|
|
* @psalm-pure |
|
6949
|
|
|
* |
|
6950
|
|
|
* @return bool |
|
6951
|
|
|
* <p>Whether or not the index exists.</p> |
|
6952
|
|
|
*/ |
|
6953
|
6 |
|
public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool |
|
6954
|
|
|
{ |
|
6955
|
|
|
// init |
|
6956
|
6 |
|
$length = (int) self::strlen($str, $encoding); |
|
6957
|
|
|
|
|
6958
|
6 |
|
if ($offset >= 0) { |
|
6959
|
3 |
|
return $length > $offset; |
|
6960
|
|
|
} |
|
6961
|
|
|
|
|
6962
|
3 |
|
return $length >= \abs($offset); |
|
6963
|
|
|
} |
|
6964
|
|
|
|
|
6965
|
|
|
/** |
|
6966
|
|
|
* Returns the character at the given index. Offsets may be negative to |
|
6967
|
|
|
* count from the last character in the string. Implements part of the |
|
6968
|
|
|
* ArrayAccess interface, and throws an OutOfBoundsException if the index |
|
6969
|
|
|
* does not exist. |
|
6970
|
|
|
* |
|
6971
|
|
|
* @param string $str <p>The input string.</p> |
|
6972
|
|
|
* @param int $index <p>The <strong>index</strong> from which to retrieve the char.</p> |
|
6973
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
6974
|
|
|
* |
|
6975
|
|
|
* @throws \OutOfBoundsException if the positive or negative offset does not exist |
|
6976
|
|
|
* |
|
6977
|
|
|
* @return string |
|
6978
|
|
|
* <p>The character at the specified index.</p> |
|
6979
|
|
|
* |
|
6980
|
|
|
* @psalm-pure |
|
6981
|
|
|
*/ |
|
6982
|
2 |
|
public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string |
|
6983
|
|
|
{ |
|
6984
|
|
|
// init |
|
6985
|
2 |
|
$length = (int) self::strlen($str); |
|
6986
|
|
|
|
|
6987
|
|
|
if ( |
|
6988
|
2 |
|
($index >= 0 && $length <= $index) |
|
6989
|
|
|
|| |
|
6990
|
2 |
|
$length < \abs($index) |
|
6991
|
|
|
) { |
|
6992
|
1 |
|
throw new \OutOfBoundsException('No character exists at the index'); |
|
6993
|
|
|
} |
|
6994
|
|
|
|
|
6995
|
1 |
|
return self::char_at($str, $index, $encoding); |
|
6996
|
|
|
} |
|
6997
|
|
|
|
|
6998
|
|
|
/** |
|
6999
|
|
|
* Pad a UTF-8 string to a given length with another string. |
|
7000
|
|
|
* |
|
7001
|
|
|
* EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code> |
|
7002
|
|
|
* |
|
7003
|
|
|
* @param string $str <p>The input string.</p> |
|
7004
|
|
|
* @param int $pad_length <p>The length of return string.</p> |
|
7005
|
|
|
* @param string $pad_string [optional] <p>String to use for padding the input string.</p> |
|
7006
|
|
|
* @param int|string $pad_type [optional] <p> |
|
7007
|
|
|
* Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br> |
|
7008
|
|
|
* <strong>STR_PAD_LEFT</strong> [or string "left"] or<br> |
|
7009
|
|
|
* <strong>STR_PAD_BOTH</strong> [or string "both"] |
|
7010
|
|
|
* </p> |
|
7011
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
7012
|
|
|
* |
|
7013
|
|
|
* @psalm-pure |
|
7014
|
|
|
* |
|
7015
|
|
|
* @return string |
|
7016
|
|
|
* <p>Returns the padded string.</p> |
|
7017
|
|
|
*/ |
|
7018
|
41 |
|
public static function str_pad( |
|
7019
|
|
|
string $str, |
|
7020
|
|
|
int $pad_length, |
|
7021
|
|
|
string $pad_string = ' ', |
|
7022
|
|
|
$pad_type = \STR_PAD_RIGHT, |
|
7023
|
|
|
string $encoding = 'UTF-8' |
|
7024
|
|
|
): string { |
|
7025
|
41 |
|
if ($pad_length === 0 || $pad_string === '') { |
|
7026
|
1 |
|
return $str; |
|
7027
|
|
|
} |
|
7028
|
|
|
|
|
7029
|
41 |
|
if ($pad_type !== (int) $pad_type) { |
|
7030
|
13 |
|
if ($pad_type === 'left') { |
|
7031
|
3 |
|
$pad_type = \STR_PAD_LEFT; |
|
7032
|
10 |
|
} elseif ($pad_type === 'right') { |
|
7033
|
6 |
|
$pad_type = \STR_PAD_RIGHT; |
|
7034
|
4 |
|
} elseif ($pad_type === 'both') { |
|
7035
|
3 |
|
$pad_type = \STR_PAD_BOTH; |
|
7036
|
|
|
} else { |
|
7037
|
1 |
|
throw new \InvalidArgumentException( |
|
7038
|
1 |
|
'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'" |
|
7039
|
|
|
); |
|
7040
|
|
|
} |
|
7041
|
|
|
} |
|
7042
|
|
|
|
|
7043
|
40 |
|
if ($encoding === 'UTF-8') { |
|
7044
|
25 |
|
$str_length = (int) \mb_strlen($str); |
|
7045
|
|
|
|
|
7046
|
25 |
|
if ($pad_length >= $str_length) { |
|
7047
|
|
|
switch ($pad_type) { |
|
7048
|
25 |
|
case \STR_PAD_LEFT: |
|
7049
|
8 |
|
$ps_length = (int) \mb_strlen($pad_string); |
|
7050
|
|
|
|
|
7051
|
8 |
|
$diff = ($pad_length - $str_length); |
|
7052
|
|
|
|
|
7053
|
8 |
|
$pre = (string) \mb_substr( |
|
7054
|
8 |
|
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)), |
|
7055
|
8 |
|
0, |
|
7056
|
8 |
|
$diff |
|
7057
|
|
|
); |
|
7058
|
8 |
|
$post = ''; |
|
7059
|
|
|
|
|
7060
|
8 |
|
break; |
|
7061
|
|
|
|
|
7062
|
20 |
|
case \STR_PAD_BOTH: |
|
7063
|
14 |
|
$diff = ($pad_length - $str_length); |
|
7064
|
|
|
|
|
7065
|
14 |
|
$ps_length_left = (int) \floor($diff / 2); |
|
7066
|
|
|
|
|
7067
|
14 |
|
$ps_length_right = (int) \ceil($diff / 2); |
|
7068
|
|
|
|
|
7069
|
14 |
|
$pre = (string) \mb_substr( |
|
7070
|
14 |
|
\str_repeat($pad_string, $ps_length_left), |
|
7071
|
14 |
|
0, |
|
7072
|
14 |
|
$ps_length_left |
|
7073
|
|
|
); |
|
7074
|
14 |
|
$post = (string) \mb_substr( |
|
7075
|
14 |
|
\str_repeat($pad_string, $ps_length_right), |
|
7076
|
14 |
|
0, |
|
7077
|
14 |
|
$ps_length_right |
|
7078
|
|
|
); |
|
7079
|
|
|
|
|
7080
|
14 |
|
break; |
|
7081
|
|
|
|
|
7082
|
9 |
|
case \STR_PAD_RIGHT: |
|
7083
|
|
|
default: |
|
7084
|
9 |
|
$ps_length = (int) \mb_strlen($pad_string); |
|
7085
|
|
|
|
|
7086
|
9 |
|
$diff = ($pad_length - $str_length); |
|
7087
|
|
|
|
|
7088
|
9 |
|
$post = (string) \mb_substr( |
|
7089
|
9 |
|
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)), |
|
7090
|
9 |
|
0, |
|
7091
|
9 |
|
$diff |
|
7092
|
|
|
); |
|
7093
|
9 |
|
$pre = ''; |
|
7094
|
|
|
} |
|
7095
|
|
|
|
|
7096
|
25 |
|
return $pre . $str . $post; |
|
7097
|
|
|
} |
|
7098
|
|
|
|
|
7099
|
3 |
|
return $str; |
|
7100
|
|
|
} |
|
7101
|
|
|
|
|
7102
|
15 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
7103
|
|
|
|
|
7104
|
15 |
|
$str_length = (int) self::strlen($str, $encoding); |
|
7105
|
|
|
|
|
7106
|
15 |
|
if ($pad_length >= $str_length) { |
|
7107
|
|
|
switch ($pad_type) { |
|
7108
|
14 |
|
case \STR_PAD_LEFT: |
|
7109
|
5 |
|
$ps_length = (int) self::strlen($pad_string, $encoding); |
|
7110
|
|
|
|
|
7111
|
5 |
|
$diff = ($pad_length - $str_length); |
|
7112
|
|
|
|
|
7113
|
5 |
|
$pre = (string) self::substr( |
|
7114
|
5 |
|
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)), |
|
7115
|
5 |
|
0, |
|
7116
|
5 |
|
$diff, |
|
7117
|
5 |
|
$encoding |
|
7118
|
|
|
); |
|
7119
|
5 |
|
$post = ''; |
|
7120
|
|
|
|
|
7121
|
5 |
|
break; |
|
7122
|
|
|
|
|
7123
|
9 |
|
case \STR_PAD_BOTH: |
|
7124
|
3 |
|
$diff = ($pad_length - $str_length); |
|
7125
|
|
|
|
|
7126
|
3 |
|
$ps_length_left = (int) \floor($diff / 2); |
|
7127
|
|
|
|
|
7128
|
3 |
|
$ps_length_right = (int) \ceil($diff / 2); |
|
7129
|
|
|
|
|
7130
|
3 |
|
$pre = (string) self::substr( |
|
7131
|
3 |
|
\str_repeat($pad_string, $ps_length_left), |
|
7132
|
3 |
|
0, |
|
7133
|
3 |
|
$ps_length_left, |
|
7134
|
3 |
|
$encoding |
|
7135
|
|
|
); |
|
7136
|
3 |
|
$post = (string) self::substr( |
|
7137
|
3 |
|
\str_repeat($pad_string, $ps_length_right), |
|
7138
|
3 |
|
0, |
|
7139
|
3 |
|
$ps_length_right, |
|
7140
|
3 |
|
$encoding |
|
7141
|
|
|
); |
|
7142
|
|
|
|
|
7143
|
3 |
|
break; |
|
7144
|
|
|
|
|
7145
|
6 |
|
case \STR_PAD_RIGHT: |
|
7146
|
|
|
default: |
|
7147
|
6 |
|
$ps_length = (int) self::strlen($pad_string, $encoding); |
|
7148
|
|
|
|
|
7149
|
6 |
|
$diff = ($pad_length - $str_length); |
|
7150
|
|
|
|
|
7151
|
6 |
|
$post = (string) self::substr( |
|
7152
|
6 |
|
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)), |
|
7153
|
6 |
|
0, |
|
7154
|
6 |
|
$diff, |
|
7155
|
6 |
|
$encoding |
|
7156
|
|
|
); |
|
7157
|
6 |
|
$pre = ''; |
|
7158
|
|
|
} |
|
7159
|
|
|
|
|
7160
|
14 |
|
return $pre . $str . $post; |
|
7161
|
|
|
} |
|
7162
|
|
|
|
|
7163
|
1 |
|
return $str; |
|
7164
|
|
|
} |
|
7165
|
|
|
|
|
7166
|
|
|
/** |
|
7167
|
|
|
* Returns a new string of a given length such that both sides of the |
|
7168
|
|
|
* string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'. |
|
7169
|
|
|
* |
|
7170
|
|
|
* @param string $str |
|
7171
|
|
|
* @param int $length <p>Desired string length after padding.</p> |
|
7172
|
|
|
* @param string $pad_str [optional] <p>String used to pad, defaults to space. Default: ' '</p> |
|
7173
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
7174
|
|
|
* |
|
7175
|
|
|
* @psalm-pure |
|
7176
|
|
|
* |
|
7177
|
|
|
* @return string |
|
7178
|
|
|
* <p>The string with padding applied.</p> |
|
7179
|
|
|
*/ |
|
7180
|
11 |
|
public static function str_pad_both( |
|
7181
|
|
|
string $str, |
|
7182
|
|
|
int $length, |
|
7183
|
|
|
string $pad_str = ' ', |
|
7184
|
|
|
string $encoding = 'UTF-8' |
|
7185
|
|
|
): string { |
|
7186
|
11 |
|
return self::str_pad( |
|
7187
|
11 |
|
$str, |
|
7188
|
11 |
|
$length, |
|
7189
|
11 |
|
$pad_str, |
|
7190
|
11 |
|
\STR_PAD_BOTH, |
|
7191
|
11 |
|
$encoding |
|
7192
|
|
|
); |
|
7193
|
|
|
} |
|
7194
|
|
|
|
|
7195
|
|
|
/** |
|
7196
|
|
|
* Returns a new string of a given length such that the beginning of the |
|
7197
|
|
|
* string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'. |
|
7198
|
|
|
* |
|
7199
|
|
|
* @param string $str |
|
7200
|
|
|
* @param int $length <p>Desired string length after padding.</p> |
|
7201
|
|
|
* @param string $pad_str [optional] <p>String used to pad, defaults to space. Default: ' '</p> |
|
7202
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
7203
|
|
|
* |
|
7204
|
|
|
* @psalm-pure |
|
7205
|
|
|
* |
|
7206
|
|
|
* @return string |
|
7207
|
|
|
* <p>The string with left padding.</p> |
|
7208
|
|
|
*/ |
|
7209
|
7 |
|
public static function str_pad_left( |
|
7210
|
|
|
string $str, |
|
7211
|
|
|
int $length, |
|
7212
|
|
|
string $pad_str = ' ', |
|
7213
|
|
|
string $encoding = 'UTF-8' |
|
7214
|
|
|
): string { |
|
7215
|
7 |
|
return self::str_pad( |
|
7216
|
7 |
|
$str, |
|
7217
|
7 |
|
$length, |
|
7218
|
7 |
|
$pad_str, |
|
7219
|
7 |
|
\STR_PAD_LEFT, |
|
7220
|
7 |
|
$encoding |
|
7221
|
|
|
); |
|
7222
|
|
|
} |
|
7223
|
|
|
|
|
7224
|
|
|
/** |
|
7225
|
|
|
* Returns a new string of a given length such that the end of the string |
|
7226
|
|
|
* is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'. |
|
7227
|
|
|
* |
|
7228
|
|
|
* @param string $str |
|
7229
|
|
|
* @param int $length <p>Desired string length after padding.</p> |
|
7230
|
|
|
* @param string $pad_str [optional] <p>String used to pad, defaults to space. Default: ' '</p> |
|
7231
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
7232
|
|
|
* |
|
7233
|
|
|
* @psalm-pure |
|
7234
|
|
|
* |
|
7235
|
|
|
* @return string |
|
7236
|
|
|
* <p>The string with right padding.</p> |
|
7237
|
|
|
*/ |
|
7238
|
7 |
|
public static function str_pad_right( |
|
7239
|
|
|
string $str, |
|
7240
|
|
|
int $length, |
|
7241
|
|
|
string $pad_str = ' ', |
|
7242
|
|
|
string $encoding = 'UTF-8' |
|
7243
|
|
|
): string { |
|
7244
|
7 |
|
return self::str_pad( |
|
7245
|
7 |
|
$str, |
|
7246
|
7 |
|
$length, |
|
7247
|
7 |
|
$pad_str, |
|
7248
|
7 |
|
\STR_PAD_RIGHT, |
|
7249
|
7 |
|
$encoding |
|
7250
|
|
|
); |
|
7251
|
|
|
} |
|
7252
|
|
|
|
|
7253
|
|
|
/** |
|
7254
|
|
|
* Repeat a string. |
|
7255
|
|
|
* |
|
7256
|
|
|
* EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code> |
|
7257
|
|
|
* |
|
7258
|
|
|
* @param string $str <p> |
|
7259
|
|
|
* The string to be repeated. |
|
7260
|
|
|
* </p> |
|
7261
|
|
|
* @param int $multiplier <p> |
|
7262
|
|
|
* Number of time the input string should be |
|
7263
|
|
|
* repeated. |
|
7264
|
|
|
* </p> |
|
7265
|
|
|
* <p> |
|
7266
|
|
|
* multiplier has to be greater than or equal to 0. |
|
7267
|
|
|
* If the multiplier is set to 0, the function |
|
7268
|
|
|
* will return an empty string. |
|
7269
|
|
|
* </p> |
|
7270
|
|
|
* |
|
7271
|
|
|
* @psalm-pure |
|
7272
|
|
|
* |
|
7273
|
|
|
* @return string |
|
7274
|
|
|
* <p>The repeated string.</p> |
|
7275
|
|
|
*/ |
|
7276
|
9 |
|
public static function str_repeat(string $str, int $multiplier): string |
|
7277
|
|
|
{ |
|
7278
|
9 |
|
$str = self::filter($str); |
|
7279
|
|
|
|
|
7280
|
9 |
|
return \str_repeat($str, $multiplier); |
|
7281
|
|
|
} |
|
7282
|
|
|
|
|
7283
|
|
|
/** |
|
7284
|
|
|
* INFO: This is only a wrapper for "str_replace()" -> the original functions is already UTF-8 safe. |
|
7285
|
|
|
* |
|
7286
|
|
|
* Replace all occurrences of the search string with the replacement string |
|
7287
|
|
|
* |
|
7288
|
|
|
* @see http://php.net/manual/en/function.str-replace.php |
|
7289
|
|
|
* |
|
7290
|
|
|
* @param string|string[] $search <p> |
|
7291
|
|
|
* The value being searched for, otherwise known as the needle. |
|
7292
|
|
|
* An array may be used to designate multiple needles. |
|
7293
|
|
|
* </p> |
|
7294
|
|
|
* @param string|string[] $replace <p> |
|
7295
|
|
|
* The replacement value that replaces found search |
|
7296
|
|
|
* values. An array may be used to designate multiple replacements. |
|
7297
|
|
|
* </p> |
|
7298
|
|
|
* @param string|string[] $subject <p> |
|
7299
|
|
|
* The string or array of strings being searched and replaced on, |
|
7300
|
|
|
* otherwise known as the haystack. |
|
7301
|
|
|
* </p> |
|
7302
|
|
|
* <p> |
|
7303
|
|
|
* If subject is an array, then the search and |
|
7304
|
|
|
* replace is performed with every entry of |
|
7305
|
|
|
* subject, and the return value is an array as |
|
7306
|
|
|
* well. |
|
7307
|
|
|
* </p> |
|
7308
|
|
|
* @param int|null $count [optional] <p> |
|
7309
|
|
|
* If passed, this will hold the number of matched and replaced needles. |
|
7310
|
|
|
* </p> |
|
7311
|
|
|
* |
|
7312
|
|
|
* @psalm-pure |
|
7313
|
|
|
* |
|
7314
|
|
|
* @return string|string[] |
|
7315
|
|
|
* <p>This function returns a string or an array with the replaced values.</p> |
|
7316
|
|
|
* |
|
7317
|
|
|
* @template TStrReplaceSubject |
|
7318
|
|
|
* @phpstan-param TStrReplaceSubject $subject |
|
7319
|
|
|
* @phpstan-return TStrReplaceSubject |
|
7320
|
|
|
* |
|
7321
|
|
|
* @deprecated please use \str_replace() instead |
|
7322
|
|
|
*/ |
|
7323
|
12 |
|
public static function str_replace( |
|
7324
|
|
|
$search, |
|
7325
|
|
|
$replace, |
|
7326
|
|
|
$subject, |
|
7327
|
|
|
int &$count = null |
|
7328
|
|
|
) { |
|
7329
|
|
|
/** |
|
7330
|
|
|
* @psalm-suppress PossiblyNullArgument |
|
7331
|
|
|
* @phpstan-var TStrReplaceSubject $return; |
|
7332
|
|
|
*/ |
|
7333
|
12 |
|
$return = \str_replace( |
|
7334
|
12 |
|
$search, |
|
7335
|
12 |
|
$replace, |
|
7336
|
12 |
|
$subject, |
|
7337
|
12 |
|
$count |
|
7338
|
|
|
); |
|
7339
|
|
|
|
|
7340
|
12 |
|
return $return; |
|
7341
|
|
|
} |
|
7342
|
|
|
|
|
7343
|
|
|
/** |
|
7344
|
|
|
* Replaces $search from the beginning of string with $replacement. |
|
7345
|
|
|
* |
|
7346
|
|
|
* @param string $str <p>The input string.</p> |
|
7347
|
|
|
* @param string $search <p>The string to search for.</p> |
|
7348
|
|
|
* @param string $replacement <p>The replacement.</p> |
|
7349
|
|
|
* |
|
7350
|
|
|
* @psalm-pure |
|
7351
|
|
|
* |
|
7352
|
|
|
* @return string |
|
7353
|
|
|
* <p>A string after the replacements.</p> |
|
7354
|
|
|
*/ |
|
7355
|
17 |
|
public static function str_replace_beginning( |
|
7356
|
|
|
string $str, |
|
7357
|
|
|
string $search, |
|
7358
|
|
|
string $replacement |
|
7359
|
|
|
): string { |
|
7360
|
17 |
|
if ($str === '') { |
|
7361
|
4 |
|
if ($replacement === '') { |
|
7362
|
2 |
|
return ''; |
|
7363
|
|
|
} |
|
7364
|
|
|
|
|
7365
|
2 |
|
if ($search === '') { |
|
7366
|
2 |
|
return $replacement; |
|
7367
|
|
|
} |
|
7368
|
|
|
} |
|
7369
|
|
|
|
|
7370
|
13 |
|
if ($search === '') { |
|
7371
|
2 |
|
return $str . $replacement; |
|
7372
|
|
|
} |
|
7373
|
|
|
|
|
7374
|
11 |
|
$searchLength = \strlen($search); |
|
7375
|
11 |
|
if (\strncmp($str, $search, $searchLength) === 0) { |
|
7376
|
9 |
|
return $replacement . \substr($str, $searchLength); |
|
7377
|
|
|
} |
|
7378
|
|
|
|
|
7379
|
2 |
|
return $str; |
|
7380
|
|
|
} |
|
7381
|
|
|
|
|
7382
|
|
|
/** |
|
7383
|
|
|
* Replaces $search from the ending of string with $replacement. |
|
7384
|
|
|
* |
|
7385
|
|
|
* @param string $str <p>The input string.</p> |
|
7386
|
|
|
* @param string $search <p>The string to search for.</p> |
|
7387
|
|
|
* @param string $replacement <p>The replacement.</p> |
|
7388
|
|
|
* |
|
7389
|
|
|
* @psalm-pure |
|
7390
|
|
|
* |
|
7391
|
|
|
* @return string |
|
7392
|
|
|
* <p>A string after the replacements.</p> |
|
7393
|
|
|
*/ |
|
7394
|
17 |
|
public static function str_replace_ending( |
|
7395
|
|
|
string $str, |
|
7396
|
|
|
string $search, |
|
7397
|
|
|
string $replacement |
|
7398
|
|
|
): string { |
|
7399
|
17 |
|
if ($str === '') { |
|
7400
|
4 |
|
if ($replacement === '') { |
|
7401
|
2 |
|
return ''; |
|
7402
|
|
|
} |
|
7403
|
|
|
|
|
7404
|
2 |
|
if ($search === '') { |
|
7405
|
2 |
|
return $replacement; |
|
7406
|
|
|
} |
|
7407
|
|
|
} |
|
7408
|
|
|
|
|
7409
|
13 |
|
if ($search === '') { |
|
7410
|
2 |
|
return $str . $replacement; |
|
7411
|
|
|
} |
|
7412
|
|
|
|
|
7413
|
11 |
|
if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) { |
|
7414
|
8 |
|
$str = \substr($str, 0, -\strlen($search)) . $replacement; |
|
7415
|
|
|
} |
|
7416
|
|
|
|
|
7417
|
11 |
|
return $str; |
|
7418
|
|
|
} |
|
7419
|
|
|
|
|
7420
|
|
|
/** |
|
7421
|
|
|
* Replace the first "$search"-term with the "$replace"-term. |
|
7422
|
|
|
* |
|
7423
|
|
|
* @param string $search |
|
7424
|
|
|
* @param string $replace |
|
7425
|
|
|
* @param string $subject |
|
7426
|
|
|
* |
|
7427
|
|
|
* @psalm-pure |
|
7428
|
|
|
* |
|
7429
|
|
|
* @return string |
|
7430
|
|
|
* |
|
7431
|
|
|
* @psalm-suppress InvalidReturnType |
|
7432
|
|
|
*/ |
|
7433
|
2 |
|
public static function str_replace_first( |
|
7434
|
|
|
string $search, |
|
7435
|
|
|
string $replace, |
|
7436
|
|
|
string $subject |
|
7437
|
|
|
): string { |
|
7438
|
2 |
|
$pos = self::strpos($subject, $search); |
|
7439
|
|
|
|
|
7440
|
2 |
|
if ($pos !== false) { |
|
7441
|
|
|
/** |
|
7442
|
|
|
* @psalm-suppress InvalidReturnStatement |
|
7443
|
|
|
*/ |
|
7444
|
2 |
|
return self::substr_replace( |
|
7445
|
2 |
|
$subject, |
|
7446
|
2 |
|
$replace, |
|
7447
|
2 |
|
$pos, |
|
7448
|
2 |
|
(int) self::strlen($search) |
|
7449
|
|
|
); |
|
7450
|
|
|
} |
|
7451
|
|
|
|
|
7452
|
2 |
|
return $subject; |
|
7453
|
|
|
} |
|
7454
|
|
|
|
|
7455
|
|
|
/** |
|
7456
|
|
|
* Replace the last "$search"-term with the "$replace"-term. |
|
7457
|
|
|
* |
|
7458
|
|
|
* @param string $search |
|
7459
|
|
|
* @param string $replace |
|
7460
|
|
|
* @param string $subject |
|
7461
|
|
|
* |
|
7462
|
|
|
* @psalm-pure |
|
7463
|
|
|
* |
|
7464
|
|
|
* @return string |
|
7465
|
|
|
* |
|
7466
|
|
|
* @psalm-suppress InvalidReturnType |
|
7467
|
|
|
*/ |
|
7468
|
2 |
|
public static function str_replace_last( |
|
7469
|
|
|
string $search, |
|
7470
|
|
|
string $replace, |
|
7471
|
|
|
string $subject |
|
7472
|
|
|
): string { |
|
7473
|
2 |
|
$pos = self::strrpos($subject, $search); |
|
7474
|
2 |
|
if ($pos !== false) { |
|
7475
|
|
|
/** |
|
7476
|
|
|
* @psalm-suppress InvalidReturnStatement |
|
7477
|
|
|
*/ |
|
7478
|
2 |
|
return self::substr_replace( |
|
7479
|
2 |
|
$subject, |
|
7480
|
2 |
|
$replace, |
|
7481
|
2 |
|
$pos, |
|
7482
|
2 |
|
(int) self::strlen($search) |
|
7483
|
|
|
); |
|
7484
|
|
|
} |
|
7485
|
|
|
|
|
7486
|
2 |
|
return $subject; |
|
7487
|
|
|
} |
|
7488
|
|
|
|
|
7489
|
|
|
/** |
|
7490
|
|
|
* Shuffles all the characters in the string. |
|
7491
|
|
|
* |
|
7492
|
|
|
* INFO: uses random algorithm which is weak for cryptography purposes |
|
7493
|
|
|
* |
|
7494
|
|
|
* EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code> |
|
7495
|
|
|
* |
|
7496
|
|
|
* @param string $str <p>The input string</p> |
|
7497
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
7498
|
|
|
* |
|
7499
|
|
|
* @return string |
|
7500
|
|
|
* <p>The shuffled string.</p> |
|
7501
|
|
|
*/ |
|
7502
|
5 |
|
public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string |
|
7503
|
|
|
{ |
|
7504
|
5 |
|
if ($encoding === 'UTF-8') { |
|
7505
|
5 |
|
$indexes = \range(0, (int) \mb_strlen($str) - 1); |
|
7506
|
5 |
|
\shuffle($indexes); |
|
7507
|
|
|
|
|
7508
|
|
|
// init |
|
7509
|
5 |
|
$shuffled_str = ''; |
|
7510
|
|
|
|
|
7511
|
5 |
|
foreach ($indexes as &$i) { |
|
7512
|
5 |
|
$tmp_sub_str = \mb_substr($str, $i, 1); |
|
7513
|
5 |
|
if ($tmp_sub_str !== false) { |
|
7514
|
5 |
|
$shuffled_str .= $tmp_sub_str; |
|
7515
|
|
|
} |
|
7516
|
|
|
} |
|
7517
|
|
|
} else { |
|
7518
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
7519
|
|
|
|
|
7520
|
|
|
$indexes = \range(0, (int) self::strlen($str, $encoding) - 1); |
|
7521
|
|
|
\shuffle($indexes); |
|
7522
|
|
|
|
|
7523
|
|
|
// init |
|
7524
|
|
|
$shuffled_str = ''; |
|
7525
|
|
|
|
|
7526
|
|
|
foreach ($indexes as &$i) { |
|
7527
|
|
|
$tmp_sub_str = self::substr($str, $i, 1, $encoding); |
|
7528
|
|
|
if ($tmp_sub_str !== false) { |
|
7529
|
|
|
$shuffled_str .= $tmp_sub_str; |
|
7530
|
|
|
} |
|
7531
|
|
|
} |
|
7532
|
|
|
} |
|
7533
|
|
|
|
|
7534
|
5 |
|
return $shuffled_str; |
|
7535
|
|
|
} |
|
7536
|
|
|
|
|
7537
|
|
|
/** |
|
7538
|
|
|
* Returns the substring beginning at $start, and up to, but not including |
|
7539
|
|
|
* the index specified by $end. If $end is omitted, the function extracts |
|
7540
|
|
|
* the remaining string. If $end is negative, it is computed from the end |
|
7541
|
|
|
* of the string. |
|
7542
|
|
|
* |
|
7543
|
|
|
* @param string $str |
|
7544
|
|
|
* @param int $start <p>Initial index from which to begin extraction.</p> |
|
7545
|
|
|
* @param int|null $end [optional] <p>Index at which to end extraction. Default: null</p> |
|
7546
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
7547
|
|
|
* |
|
7548
|
|
|
* @psalm-pure |
|
7549
|
|
|
* |
|
7550
|
|
|
* @return false|string |
|
7551
|
|
|
* <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i> |
|
7552
|
|
|
* characters long, <b>FALSE</b> will be returned. |
|
7553
|
|
|
*/ |
|
7554
|
18 |
|
public static function str_slice( |
|
7555
|
|
|
string $str, |
|
7556
|
|
|
int $start, |
|
7557
|
|
|
int $end = null, |
|
7558
|
|
|
string $encoding = 'UTF-8' |
|
7559
|
|
|
) { |
|
7560
|
18 |
|
if ($encoding === 'UTF-8') { |
|
7561
|
7 |
|
if ($end === null) { |
|
7562
|
1 |
|
$length = (int) \mb_strlen($str); |
|
7563
|
6 |
|
} elseif ($end >= 0 && $end <= $start) { |
|
7564
|
2 |
|
return ''; |
|
7565
|
4 |
|
} elseif ($end < 0) { |
|
7566
|
1 |
|
$length = (int) \mb_strlen($str) + $end - $start; |
|
7567
|
|
|
} else { |
|
7568
|
3 |
|
$length = $end - $start; |
|
7569
|
|
|
} |
|
7570
|
|
|
|
|
7571
|
5 |
|
return \mb_substr($str, $start, $length); |
|
7572
|
|
|
} |
|
7573
|
|
|
|
|
7574
|
11 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
7575
|
|
|
|
|
7576
|
11 |
|
if ($end === null) { |
|
7577
|
5 |
|
$length = (int) self::strlen($str, $encoding); |
|
7578
|
6 |
|
} elseif ($end >= 0 && $end <= $start) { |
|
7579
|
2 |
|
return ''; |
|
7580
|
4 |
|
} elseif ($end < 0) { |
|
7581
|
1 |
|
$length = (int) self::strlen($str, $encoding) + $end - $start; |
|
7582
|
|
|
} else { |
|
7583
|
3 |
|
$length = $end - $start; |
|
7584
|
|
|
} |
|
7585
|
|
|
|
|
7586
|
9 |
|
return self::substr($str, $start, $length, $encoding); |
|
7587
|
|
|
} |
|
7588
|
|
|
|
|
7589
|
|
|
/** |
|
7590
|
|
|
* Convert a string to e.g.: "snake_case" |
|
7591
|
|
|
* |
|
7592
|
|
|
* @param string $str |
|
7593
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
7594
|
|
|
* |
|
7595
|
|
|
* @psalm-pure |
|
7596
|
|
|
* |
|
7597
|
|
|
* @return string |
|
7598
|
|
|
* <p>A string in snake_case.</p> |
|
7599
|
|
|
*/ |
|
7600
|
22 |
|
public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string |
|
7601
|
|
|
{ |
|
7602
|
22 |
|
if ($str === '') { |
|
7603
|
|
|
return ''; |
|
7604
|
|
|
} |
|
7605
|
|
|
|
|
7606
|
22 |
|
$str = \str_replace( |
|
7607
|
22 |
|
'-', |
|
7608
|
22 |
|
'_', |
|
7609
|
22 |
|
self::normalize_whitespace($str) |
|
7610
|
|
|
); |
|
7611
|
|
|
|
|
7612
|
22 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
7613
|
19 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
7614
|
|
|
} |
|
7615
|
|
|
|
|
7616
|
22 |
|
$str = (string) \preg_replace_callback( |
|
7617
|
22 |
|
'/([\\p{N}|\\p{Lu}])/u', |
|
7618
|
|
|
/** |
|
7619
|
|
|
* @param string[] $matches |
|
7620
|
|
|
* |
|
7621
|
|
|
* @psalm-pure |
|
7622
|
|
|
* |
|
7623
|
|
|
* @return string |
|
7624
|
|
|
*/ |
|
7625
|
|
|
static function (array $matches) use ($encoding): string { |
|
7626
|
9 |
|
$match = $matches[1]; |
|
7627
|
9 |
|
$match_int = (int) $match; |
|
7628
|
|
|
|
|
7629
|
9 |
|
if ((string) $match_int === $match) { |
|
7630
|
4 |
|
return '_' . $match . '_'; |
|
7631
|
|
|
} |
|
7632
|
|
|
|
|
7633
|
5 |
|
if ($encoding === 'UTF-8') { |
|
7634
|
5 |
|
return '_' . \mb_strtolower($match); |
|
7635
|
|
|
} |
|
7636
|
|
|
|
|
7637
|
|
|
return '_' . self::strtolower($match, $encoding); |
|
7638
|
22 |
|
}, |
|
7639
|
22 |
|
$str |
|
7640
|
|
|
); |
|
7641
|
|
|
|
|
7642
|
22 |
|
$str = (string) \preg_replace( |
|
7643
|
|
|
[ |
|
7644
|
22 |
|
'/\\s+/u', // convert spaces to "_" |
|
7645
|
|
|
'/^\\s+|\\s+$/u', // trim leading & trailing spaces |
|
7646
|
|
|
'/_+/', // remove double "_" |
|
7647
|
|
|
], |
|
7648
|
|
|
[ |
|
7649
|
22 |
|
'_', |
|
7650
|
|
|
'', |
|
7651
|
|
|
'_', |
|
7652
|
|
|
], |
|
7653
|
22 |
|
$str |
|
7654
|
|
|
); |
|
7655
|
|
|
|
|
7656
|
22 |
|
return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace |
|
7657
|
|
|
} |
|
7658
|
|
|
|
|
7659
|
|
|
/** |
|
7660
|
|
|
* Sort all characters according to code points. |
|
7661
|
|
|
* |
|
7662
|
|
|
* EXAMPLE: <code>UTF8::str_sort(' -ABC-中文空白- '); // ' ---ABC中文白空'</code> |
|
7663
|
|
|
* |
|
7664
|
|
|
* @param string $str <p>A UTF-8 string.</p> |
|
7665
|
|
|
* @param bool $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p> |
|
7666
|
|
|
* @param bool $desc <p>If <strong>true</strong>, will sort characters in reverse code point order.</p> |
|
7667
|
|
|
* |
|
7668
|
|
|
* @psalm-pure |
|
7669
|
|
|
* |
|
7670
|
|
|
* @return string |
|
7671
|
|
|
* <p>A string of sorted characters.</p> |
|
7672
|
|
|
*/ |
|
7673
|
2 |
|
public static function str_sort(string $str, bool $unique = false, bool $desc = false): string |
|
7674
|
|
|
{ |
|
7675
|
|
|
/** @var int[] $array */ |
|
7676
|
2 |
|
$array = self::codepoints($str); |
|
7677
|
|
|
|
|
7678
|
2 |
|
if ($unique) { |
|
7679
|
2 |
|
$array = \array_flip(\array_flip($array)); |
|
7680
|
|
|
} |
|
7681
|
|
|
|
|
7682
|
2 |
|
if ($desc) { |
|
7683
|
2 |
|
\arsort($array); |
|
7684
|
|
|
} else { |
|
7685
|
2 |
|
\asort($array); |
|
7686
|
|
|
} |
|
7687
|
|
|
|
|
7688
|
2 |
|
return self::string($array); |
|
7689
|
|
|
} |
|
7690
|
|
|
|
|
7691
|
|
|
/** |
|
7692
|
|
|
* Convert a string to an array of Unicode characters. |
|
7693
|
|
|
* |
|
7694
|
|
|
* EXAMPLE: <code> |
|
7695
|
|
|
* UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']] |
|
7696
|
|
|
* </code> |
|
7697
|
|
|
* |
|
7698
|
|
|
* @param int[]|string[] $input <p>The string[] or int[] to split into array.</p> |
|
7699
|
|
|
* @param int $length [optional] <p>Max character length of each array |
|
7700
|
|
|
* lement.</p> |
|
7701
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the |
|
7702
|
|
|
* string.</p> |
|
7703
|
|
|
* @param bool $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use |
|
7704
|
|
|
* "mb_substr"</p> |
|
7705
|
|
|
* |
|
7706
|
|
|
* @psalm-pure |
|
7707
|
|
|
* |
|
7708
|
|
|
* @return string[][] |
|
7709
|
|
|
* <p>An array containing chunks of the input.</p> |
|
7710
|
|
|
*/ |
|
7711
|
1 |
|
public static function str_split_array( |
|
7712
|
|
|
array $input, |
|
7713
|
|
|
int $length = 1, |
|
7714
|
|
|
bool $clean_utf8 = false, |
|
7715
|
|
|
bool $try_to_use_mb_functions = true |
|
7716
|
|
|
): array { |
|
7717
|
1 |
|
foreach ($input as &$v) { |
|
7718
|
1 |
|
$v = self::str_split( |
|
7719
|
1 |
|
$v, |
|
7720
|
1 |
|
$length, |
|
7721
|
1 |
|
$clean_utf8, |
|
7722
|
1 |
|
$try_to_use_mb_functions |
|
7723
|
|
|
); |
|
7724
|
|
|
} |
|
7725
|
|
|
|
|
7726
|
|
|
/** @var string[][] $input */ |
|
7727
|
1 |
|
return $input; |
|
7728
|
|
|
} |
|
7729
|
|
|
|
|
7730
|
|
|
/** |
|
7731
|
|
|
* Convert a string to an array of unicode characters. |
|
7732
|
|
|
* |
|
7733
|
|
|
* EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code> |
|
7734
|
|
|
* |
|
7735
|
|
|
* @param int|string $input <p>The string or int to split into array.</p> |
|
7736
|
|
|
* @param int $length [optional] <p>Max character length of each array |
|
7737
|
|
|
* element.</p> |
|
7738
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the |
|
7739
|
|
|
* string.</p> |
|
7740
|
|
|
* @param bool $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use |
|
7741
|
|
|
* "mb_substr"</p> |
|
7742
|
|
|
* |
|
7743
|
|
|
* @psalm-pure |
|
7744
|
|
|
* |
|
7745
|
|
|
* @return string[] |
|
7746
|
|
|
* <p>An array containing chunks of chars from the input.</p> |
|
7747
|
|
|
*/ |
|
7748
|
90 |
|
public static function str_split( |
|
7749
|
|
|
$input, |
|
7750
|
|
|
int $length = 1, |
|
7751
|
|
|
bool $clean_utf8 = false, |
|
7752
|
|
|
bool $try_to_use_mb_functions = true |
|
7753
|
|
|
): array { |
|
7754
|
90 |
|
if ($length <= 0) { |
|
7755
|
3 |
|
return []; |
|
7756
|
|
|
} |
|
7757
|
|
|
|
|
7758
|
|
|
// this is only an old fallback |
|
7759
|
|
|
/** @noinspection PhpSillyAssignmentInspection - hack for phpstan */ |
|
7760
|
|
|
/** @var int|int[]|string|string[] $input */ |
|
7761
|
89 |
|
$input = $input; |
|
7762
|
89 |
|
if (\is_array($input)) { |
|
7763
|
|
|
/** @psalm-suppress InvalidReturnStatement */ |
|
7764
|
|
|
/** @phpstan-ignore-next-line - old code :/ */ |
|
7765
|
|
|
return self::str_split_array( |
|
|
|
|
|
|
7766
|
|
|
$input, |
|
7767
|
|
|
$length, |
|
7768
|
|
|
$clean_utf8, |
|
7769
|
|
|
$try_to_use_mb_functions |
|
7770
|
|
|
); |
|
7771
|
|
|
} |
|
7772
|
|
|
|
|
7773
|
|
|
// init |
|
7774
|
89 |
|
$input = (string) $input; |
|
7775
|
|
|
|
|
7776
|
89 |
|
if ($input === '') { |
|
7777
|
14 |
|
return []; |
|
7778
|
|
|
} |
|
7779
|
|
|
|
|
7780
|
86 |
|
if ($clean_utf8) { |
|
7781
|
19 |
|
$input = self::clean($input); |
|
7782
|
|
|
} |
|
7783
|
|
|
|
|
7784
|
|
|
if ( |
|
7785
|
86 |
|
$try_to_use_mb_functions |
|
7786
|
|
|
&& |
|
7787
|
86 |
|
self::$SUPPORT['mbstring'] === true |
|
7788
|
|
|
) { |
|
7789
|
82 |
|
if (\function_exists('mb_str_split')) { |
|
7790
|
|
|
/** |
|
7791
|
|
|
* @psalm-suppress ImpureFunctionCall - why? |
|
7792
|
|
|
*/ |
|
7793
|
82 |
|
$return = \mb_str_split($input, $length); |
|
7794
|
82 |
|
if ($return !== false) { |
|
7795
|
82 |
|
return $return; |
|
7796
|
|
|
} |
|
7797
|
|
|
} |
|
7798
|
|
|
|
|
7799
|
|
|
$i_max = \mb_strlen($input); |
|
7800
|
|
|
if ($i_max <= 127) { |
|
7801
|
|
|
$ret = []; |
|
7802
|
|
|
for ($i = 0; $i < $i_max; ++$i) { |
|
7803
|
|
|
$ret[] = \mb_substr($input, $i, 1); |
|
7804
|
|
|
} |
|
7805
|
|
|
} else { |
|
7806
|
|
|
$return_array = []; |
|
7807
|
|
|
\preg_match_all('/./us', $input, $return_array); |
|
7808
|
|
|
$ret = $return_array[0] ?? []; |
|
7809
|
|
|
} |
|
7810
|
23 |
|
} elseif (self::$SUPPORT['pcre_utf8'] === true) { |
|
7811
|
17 |
|
$return_array = []; |
|
7812
|
17 |
|
\preg_match_all('/./us', $input, $return_array); |
|
7813
|
17 |
|
$ret = $return_array[0] ?? []; |
|
7814
|
|
|
} else { |
|
7815
|
|
|
|
|
7816
|
|
|
// fallback |
|
7817
|
|
|
|
|
7818
|
8 |
|
$ret = []; |
|
7819
|
8 |
|
$len = \strlen($input); |
|
7820
|
|
|
|
|
7821
|
8 |
|
for ($i = 0; $i < $len; ++$i) { |
|
7822
|
8 |
|
if (($input[$i] & "\x80") === "\x00") { |
|
7823
|
8 |
|
$ret[] = $input[$i]; |
|
7824
|
|
|
} elseif ( |
|
7825
|
8 |
|
isset($input[$i + 1]) |
|
7826
|
|
|
&& |
|
7827
|
8 |
|
($input[$i] & "\xE0") === "\xC0" |
|
7828
|
|
|
) { |
|
7829
|
4 |
|
if (($input[$i + 1] & "\xC0") === "\x80") { |
|
7830
|
4 |
|
$ret[] = $input[$i] . $input[$i + 1]; |
|
7831
|
|
|
|
|
7832
|
4 |
|
++$i; |
|
7833
|
|
|
} |
|
7834
|
|
|
} elseif ( |
|
7835
|
6 |
|
isset($input[$i + 2]) |
|
7836
|
|
|
&& |
|
7837
|
6 |
|
($input[$i] & "\xF0") === "\xE0" |
|
7838
|
|
|
) { |
|
7839
|
|
|
if ( |
|
7840
|
6 |
|
($input[$i + 1] & "\xC0") === "\x80" |
|
7841
|
|
|
&& |
|
7842
|
6 |
|
($input[$i + 2] & "\xC0") === "\x80" |
|
7843
|
|
|
) { |
|
7844
|
6 |
|
$ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2]; |
|
7845
|
|
|
|
|
7846
|
6 |
|
$i += 2; |
|
7847
|
|
|
} |
|
7848
|
|
|
} elseif ( |
|
7849
|
|
|
isset($input[$i + 3]) |
|
7850
|
|
|
&& |
|
7851
|
|
|
($input[$i] & "\xF8") === "\xF0" |
|
7852
|
|
|
) { |
|
7853
|
|
|
if ( |
|
7854
|
|
|
($input[$i + 1] & "\xC0") === "\x80" |
|
7855
|
|
|
&& |
|
7856
|
|
|
($input[$i + 2] & "\xC0") === "\x80" |
|
7857
|
|
|
&& |
|
7858
|
|
|
($input[$i + 3] & "\xC0") === "\x80" |
|
7859
|
|
|
) { |
|
7860
|
|
|
$ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3]; |
|
7861
|
|
|
|
|
7862
|
|
|
$i += 3; |
|
7863
|
|
|
} |
|
7864
|
|
|
} |
|
7865
|
|
|
} |
|
7866
|
|
|
} |
|
7867
|
|
|
|
|
7868
|
23 |
|
if ($length > 1) { |
|
7869
|
2 |
|
return \array_map( |
|
7870
|
|
|
static function (array $item): string { |
|
7871
|
2 |
|
return \implode('', $item); |
|
7872
|
2 |
|
}, |
|
7873
|
2 |
|
\array_chunk($ret, $length) |
|
7874
|
|
|
); |
|
7875
|
|
|
} |
|
7876
|
|
|
|
|
7877
|
23 |
|
if (isset($ret[0]) && $ret[0] === '') { |
|
7878
|
|
|
return []; |
|
7879
|
|
|
} |
|
7880
|
|
|
|
|
7881
|
23 |
|
return $ret; |
|
7882
|
|
|
} |
|
7883
|
|
|
|
|
7884
|
|
|
/** |
|
7885
|
|
|
* Splits the string with the provided regular expression, returning an |
|
7886
|
|
|
* array of strings. An optional integer $limit will truncate the |
|
7887
|
|
|
* results. |
|
7888
|
|
|
* |
|
7889
|
|
|
* @param string $str |
|
7890
|
|
|
* @param string $pattern <p>The regex with which to split the string.</p> |
|
7891
|
|
|
* @param int $limit [optional] <p>Maximum number of results to return. Default: -1 === no limit</p> |
|
7892
|
|
|
* |
|
7893
|
|
|
* @psalm-pure |
|
7894
|
|
|
* |
|
7895
|
|
|
* @return string[] |
|
7896
|
|
|
* <p>An array of strings.</p> |
|
7897
|
|
|
*/ |
|
7898
|
16 |
|
public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array |
|
7899
|
|
|
{ |
|
7900
|
16 |
|
if ($limit === 0) { |
|
7901
|
2 |
|
return []; |
|
7902
|
|
|
} |
|
7903
|
|
|
|
|
7904
|
14 |
|
if ($pattern === '') { |
|
7905
|
1 |
|
return [$str]; |
|
7906
|
|
|
} |
|
7907
|
|
|
|
|
7908
|
13 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
7909
|
13 |
|
if ($limit >= 0) { |
|
7910
|
8 |
|
$result_tmp = \mb_split($pattern, $str); |
|
7911
|
8 |
|
if ($result_tmp === false) { |
|
7912
|
|
|
return []; |
|
7913
|
|
|
} |
|
7914
|
|
|
|
|
7915
|
8 |
|
$result = []; |
|
7916
|
8 |
|
foreach ($result_tmp as $item_tmp) { |
|
7917
|
8 |
|
if ($limit === 0) { |
|
7918
|
4 |
|
break; |
|
7919
|
|
|
} |
|
7920
|
8 |
|
--$limit; |
|
7921
|
|
|
|
|
7922
|
8 |
|
$result[] = $item_tmp; |
|
7923
|
|
|
} |
|
7924
|
|
|
|
|
7925
|
8 |
|
return $result; |
|
7926
|
|
|
} |
|
7927
|
|
|
|
|
7928
|
5 |
|
$result = \mb_split($pattern, $str); |
|
7929
|
5 |
|
if ($result === false) { |
|
7930
|
|
|
return []; |
|
7931
|
|
|
} |
|
7932
|
|
|
|
|
7933
|
5 |
|
return $result; |
|
7934
|
|
|
} |
|
7935
|
|
|
|
|
7936
|
|
|
if ($limit > 0) { |
|
7937
|
|
|
++$limit; |
|
7938
|
|
|
} else { |
|
7939
|
|
|
$limit = -1; |
|
7940
|
|
|
} |
|
7941
|
|
|
|
|
7942
|
|
|
$array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit); |
|
7943
|
|
|
if ($array === false) { |
|
7944
|
|
|
return []; |
|
7945
|
|
|
} |
|
7946
|
|
|
|
|
7947
|
|
|
if ($limit > 0 && \count($array) === $limit) { |
|
7948
|
|
|
\array_pop($array); |
|
7949
|
|
|
} |
|
7950
|
|
|
|
|
7951
|
|
|
return $array; |
|
7952
|
|
|
} |
|
7953
|
|
|
|
|
7954
|
|
|
/** |
|
7955
|
|
|
* Check if the string starts with the given substring. |
|
7956
|
|
|
* |
|
7957
|
|
|
* EXAMPLE: <code> |
|
7958
|
|
|
* UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true |
|
7959
|
|
|
* UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false |
|
7960
|
|
|
* </code> |
|
7961
|
|
|
* |
|
7962
|
|
|
* @param string $haystack <p>The string to search in.</p> |
|
7963
|
|
|
* @param string $needle <p>The substring to search for.</p> |
|
7964
|
|
|
* |
|
7965
|
|
|
* @psalm-pure |
|
7966
|
|
|
* |
|
7967
|
|
|
* @return bool |
|
7968
|
|
|
*/ |
|
7969
|
19 |
|
public static function str_starts_with(string $haystack, string $needle): bool |
|
7970
|
|
|
{ |
|
7971
|
19 |
|
if ($needle === '') { |
|
7972
|
2 |
|
return true; |
|
7973
|
|
|
} |
|
7974
|
|
|
|
|
7975
|
19 |
|
if ($haystack === '') { |
|
7976
|
|
|
return false; |
|
7977
|
|
|
} |
|
7978
|
|
|
|
|
7979
|
19 |
|
if (\PHP_VERSION_ID >= 80000) { |
|
7980
|
|
|
/** @phpstan-ignore-next-line - only for PHP8 */ |
|
7981
|
|
|
return \str_starts_with($haystack, $needle); |
|
7982
|
|
|
} |
|
7983
|
|
|
|
|
7984
|
19 |
|
return \strncmp($haystack, $needle, \strlen($needle)) === 0; |
|
7985
|
|
|
} |
|
7986
|
|
|
|
|
7987
|
|
|
/** |
|
7988
|
|
|
* Returns true if the string begins with any of $substrings, false otherwise. |
|
7989
|
|
|
* |
|
7990
|
|
|
* - case-sensitive |
|
7991
|
|
|
* |
|
7992
|
|
|
* @param string $str <p>The input string.</p> |
|
7993
|
|
|
* @param array $substrings <p>Substrings to look for.</p> |
|
7994
|
|
|
* |
|
7995
|
|
|
* @psalm-pure |
|
7996
|
|
|
* |
|
7997
|
|
|
* @return bool |
|
7998
|
|
|
* <p>Whether or not $str starts with $substring.</p> |
|
7999
|
|
|
*/ |
|
8000
|
8 |
|
public static function str_starts_with_any(string $str, array $substrings): bool |
|
8001
|
|
|
{ |
|
8002
|
8 |
|
if ($str === '') { |
|
8003
|
|
|
return false; |
|
8004
|
|
|
} |
|
8005
|
|
|
|
|
8006
|
8 |
|
if ($substrings === []) { |
|
8007
|
|
|
return false; |
|
8008
|
|
|
} |
|
8009
|
|
|
|
|
8010
|
8 |
|
foreach ($substrings as &$substring) { |
|
8011
|
8 |
|
if (self::str_starts_with($str, $substring)) { |
|
8012
|
8 |
|
return true; |
|
8013
|
|
|
} |
|
8014
|
|
|
} |
|
8015
|
|
|
|
|
8016
|
6 |
|
return false; |
|
8017
|
|
|
} |
|
8018
|
|
|
|
|
8019
|
|
|
/** |
|
8020
|
|
|
* Gets the substring after the first occurrence of a separator. |
|
8021
|
|
|
* |
|
8022
|
|
|
* @param string $str <p>The input string.</p> |
|
8023
|
|
|
* @param string $separator <p>The string separator.</p> |
|
8024
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
8025
|
|
|
* |
|
8026
|
|
|
* @psalm-pure |
|
8027
|
|
|
* |
|
8028
|
|
|
* @return string |
|
8029
|
|
|
*/ |
|
8030
|
1 |
|
public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
|
8031
|
|
|
{ |
|
8032
|
1 |
|
if ($separator === '' || $str === '') { |
|
8033
|
1 |
|
return ''; |
|
8034
|
|
|
} |
|
8035
|
|
|
|
|
8036
|
1 |
|
if ($encoding === 'UTF-8') { |
|
8037
|
1 |
|
$offset = \mb_strpos($str, $separator); |
|
8038
|
1 |
|
if ($offset === false) { |
|
8039
|
1 |
|
return ''; |
|
8040
|
|
|
} |
|
8041
|
|
|
|
|
8042
|
1 |
|
return (string) \mb_substr( |
|
8043
|
1 |
|
$str, |
|
8044
|
1 |
|
$offset + (int) \mb_strlen($separator) |
|
8045
|
|
|
); |
|
8046
|
|
|
} |
|
8047
|
|
|
|
|
8048
|
|
|
$offset = self::strpos($str, $separator, 0, $encoding); |
|
8049
|
|
|
if ($offset === false) { |
|
8050
|
|
|
return ''; |
|
8051
|
|
|
} |
|
8052
|
|
|
|
|
8053
|
|
|
return (string) \mb_substr( |
|
8054
|
|
|
$str, |
|
8055
|
|
|
$offset + (int) self::strlen($separator, $encoding), |
|
8056
|
|
|
null, |
|
8057
|
|
|
$encoding |
|
8058
|
|
|
); |
|
8059
|
|
|
} |
|
8060
|
|
|
|
|
8061
|
|
|
/** |
|
8062
|
|
|
* Gets the substring after the last occurrence of a separator. |
|
8063
|
|
|
* |
|
8064
|
|
|
* @param string $str <p>The input string.</p> |
|
8065
|
|
|
* @param string $separator <p>The string separator.</p> |
|
8066
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
8067
|
|
|
* |
|
8068
|
|
|
* @psalm-pure |
|
8069
|
|
|
* |
|
8070
|
|
|
* @return string |
|
8071
|
|
|
*/ |
|
8072
|
1 |
|
public static function str_substr_after_last_separator( |
|
8073
|
|
|
string $str, |
|
8074
|
|
|
string $separator, |
|
8075
|
|
|
string $encoding = 'UTF-8' |
|
8076
|
|
|
): string { |
|
8077
|
1 |
|
if ($separator === '' || $str === '') { |
|
8078
|
1 |
|
return ''; |
|
8079
|
|
|
} |
|
8080
|
|
|
|
|
8081
|
1 |
|
if ($encoding === 'UTF-8') { |
|
8082
|
1 |
|
$offset = \mb_strrpos($str, $separator); |
|
8083
|
1 |
|
if ($offset === false) { |
|
8084
|
1 |
|
return ''; |
|
8085
|
|
|
} |
|
8086
|
|
|
|
|
8087
|
1 |
|
return (string) \mb_substr( |
|
8088
|
1 |
|
$str, |
|
8089
|
1 |
|
$offset + (int) \mb_strlen($separator) |
|
8090
|
|
|
); |
|
8091
|
|
|
} |
|
8092
|
|
|
|
|
8093
|
|
|
$offset = self::strrpos($str, $separator, 0, $encoding); |
|
8094
|
|
|
if ($offset === false) { |
|
8095
|
|
|
return ''; |
|
8096
|
|
|
} |
|
8097
|
|
|
|
|
8098
|
|
|
return (string) self::substr( |
|
8099
|
|
|
$str, |
|
8100
|
|
|
$offset + (int) self::strlen($separator, $encoding), |
|
8101
|
|
|
null, |
|
8102
|
|
|
$encoding |
|
8103
|
|
|
); |
|
8104
|
|
|
} |
|
8105
|
|
|
|
|
8106
|
|
|
/** |
|
8107
|
|
|
* Gets the substring before the first occurrence of a separator. |
|
8108
|
|
|
* |
|
8109
|
|
|
* @param string $str <p>The input string.</p> |
|
8110
|
|
|
* @param string $separator <p>The string separator.</p> |
|
8111
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
8112
|
|
|
* |
|
8113
|
|
|
* @psalm-pure |
|
8114
|
|
|
* |
|
8115
|
|
|
* @return string |
|
8116
|
|
|
*/ |
|
8117
|
1 |
|
public static function str_substr_before_first_separator( |
|
8118
|
|
|
string $str, |
|
8119
|
|
|
string $separator, |
|
8120
|
|
|
string $encoding = 'UTF-8' |
|
8121
|
|
|
): string { |
|
8122
|
1 |
|
if ($separator === '' || $str === '') { |
|
8123
|
1 |
|
return ''; |
|
8124
|
|
|
} |
|
8125
|
|
|
|
|
8126
|
1 |
|
if ($encoding === 'UTF-8') { |
|
8127
|
1 |
|
$offset = \mb_strpos($str, $separator); |
|
8128
|
1 |
|
if ($offset === false) { |
|
8129
|
1 |
|
return ''; |
|
8130
|
|
|
} |
|
8131
|
|
|
|
|
8132
|
1 |
|
return (string) \mb_substr( |
|
8133
|
1 |
|
$str, |
|
8134
|
1 |
|
0, |
|
8135
|
1 |
|
$offset |
|
8136
|
|
|
); |
|
8137
|
|
|
} |
|
8138
|
|
|
|
|
8139
|
|
|
$offset = self::strpos($str, $separator, 0, $encoding); |
|
8140
|
|
|
if ($offset === false) { |
|
8141
|
|
|
return ''; |
|
8142
|
|
|
} |
|
8143
|
|
|
|
|
8144
|
|
|
return (string) self::substr( |
|
8145
|
|
|
$str, |
|
8146
|
|
|
0, |
|
8147
|
|
|
$offset, |
|
8148
|
|
|
$encoding |
|
8149
|
|
|
); |
|
8150
|
|
|
} |
|
8151
|
|
|
|
|
8152
|
|
|
/** |
|
8153
|
|
|
* Gets the substring before the last occurrence of a separator. |
|
8154
|
|
|
* |
|
8155
|
|
|
* @param string $str <p>The input string.</p> |
|
8156
|
|
|
* @param string $separator <p>The string separator.</p> |
|
8157
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
8158
|
|
|
* |
|
8159
|
|
|
* @psalm-pure |
|
8160
|
|
|
* |
|
8161
|
|
|
* @return string |
|
8162
|
|
|
*/ |
|
8163
|
1 |
|
public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
|
8164
|
|
|
{ |
|
8165
|
1 |
|
if ($separator === '' || $str === '') { |
|
8166
|
1 |
|
return ''; |
|
8167
|
|
|
} |
|
8168
|
|
|
|
|
8169
|
1 |
|
if ($encoding === 'UTF-8') { |
|
8170
|
1 |
|
$offset = \mb_strrpos($str, $separator); |
|
8171
|
1 |
|
if ($offset === false) { |
|
8172
|
1 |
|
return ''; |
|
8173
|
|
|
} |
|
8174
|
|
|
|
|
8175
|
1 |
|
return (string) \mb_substr( |
|
8176
|
1 |
|
$str, |
|
8177
|
1 |
|
0, |
|
8178
|
1 |
|
$offset |
|
8179
|
|
|
); |
|
8180
|
|
|
} |
|
8181
|
|
|
|
|
8182
|
|
|
$offset = self::strrpos($str, $separator, 0, $encoding); |
|
8183
|
|
|
if ($offset === false) { |
|
8184
|
|
|
return ''; |
|
8185
|
|
|
} |
|
8186
|
|
|
|
|
8187
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
8188
|
|
|
|
|
8189
|
|
|
return (string) self::substr( |
|
8190
|
|
|
$str, |
|
8191
|
|
|
0, |
|
8192
|
|
|
$offset, |
|
8193
|
|
|
$encoding |
|
8194
|
|
|
); |
|
8195
|
|
|
} |
|
8196
|
|
|
|
|
8197
|
|
|
/** |
|
8198
|
|
|
* Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle". |
|
8199
|
|
|
* |
|
8200
|
|
|
* @param string $str <p>The input string.</p> |
|
8201
|
|
|
* @param string $needle <p>The string to look for.</p> |
|
8202
|
|
|
* @param bool $before_needle [optional] <p>Default: false</p> |
|
8203
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
8204
|
|
|
* |
|
8205
|
|
|
* @psalm-pure |
|
8206
|
|
|
* |
|
8207
|
|
|
* @return string |
|
8208
|
|
|
*/ |
|
8209
|
2 |
|
public static function str_substr_first( |
|
8210
|
|
|
string $str, |
|
8211
|
|
|
string $needle, |
|
8212
|
|
|
bool $before_needle = false, |
|
8213
|
|
|
string $encoding = 'UTF-8' |
|
8214
|
|
|
): string { |
|
8215
|
2 |
|
if ($str === '' || $needle === '') { |
|
8216
|
2 |
|
return ''; |
|
8217
|
|
|
} |
|
8218
|
|
|
|
|
8219
|
2 |
|
if ($encoding === 'UTF-8') { |
|
8220
|
2 |
|
if ($before_needle) { |
|
8221
|
1 |
|
$part = \mb_strstr( |
|
8222
|
1 |
|
$str, |
|
8223
|
1 |
|
$needle, |
|
8224
|
1 |
|
$before_needle |
|
8225
|
|
|
); |
|
8226
|
|
|
} else { |
|
8227
|
1 |
|
$part = \mb_strstr( |
|
8228
|
1 |
|
$str, |
|
8229
|
2 |
|
$needle |
|
8230
|
|
|
); |
|
8231
|
|
|
} |
|
8232
|
|
|
} else { |
|
8233
|
|
|
$part = self::strstr( |
|
8234
|
|
|
$str, |
|
8235
|
|
|
$needle, |
|
8236
|
|
|
$before_needle, |
|
8237
|
|
|
$encoding |
|
8238
|
|
|
); |
|
8239
|
|
|
} |
|
8240
|
|
|
|
|
8241
|
2 |
|
return $part === false ? '' : $part; |
|
8242
|
|
|
} |
|
8243
|
|
|
|
|
8244
|
|
|
/** |
|
8245
|
|
|
* Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle". |
|
8246
|
|
|
* |
|
8247
|
|
|
* @param string $str <p>The input string.</p> |
|
8248
|
|
|
* @param string $needle <p>The string to look for.</p> |
|
8249
|
|
|
* @param bool $before_needle [optional] <p>Default: false</p> |
|
8250
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
8251
|
|
|
* |
|
8252
|
|
|
* @psalm-pure |
|
8253
|
|
|
* |
|
8254
|
|
|
* @return string |
|
8255
|
|
|
*/ |
|
8256
|
2 |
|
public static function str_substr_last( |
|
8257
|
|
|
string $str, |
|
8258
|
|
|
string $needle, |
|
8259
|
|
|
bool $before_needle = false, |
|
8260
|
|
|
string $encoding = 'UTF-8' |
|
8261
|
|
|
): string { |
|
8262
|
2 |
|
if ($str === '' || $needle === '') { |
|
8263
|
2 |
|
return ''; |
|
8264
|
|
|
} |
|
8265
|
|
|
|
|
8266
|
2 |
|
if ($encoding === 'UTF-8') { |
|
8267
|
2 |
|
if ($before_needle) { |
|
8268
|
1 |
|
$part = \mb_strrchr( |
|
8269
|
1 |
|
$str, |
|
8270
|
1 |
|
$needle, |
|
8271
|
1 |
|
$before_needle |
|
8272
|
|
|
); |
|
8273
|
|
|
} else { |
|
8274
|
1 |
|
$part = \mb_strrchr( |
|
8275
|
1 |
|
$str, |
|
8276
|
2 |
|
$needle |
|
8277
|
|
|
); |
|
8278
|
|
|
} |
|
8279
|
|
|
} else { |
|
8280
|
|
|
$part = self::strrchr( |
|
8281
|
|
|
$str, |
|
8282
|
|
|
$needle, |
|
8283
|
|
|
$before_needle, |
|
8284
|
|
|
$encoding |
|
8285
|
|
|
); |
|
8286
|
|
|
} |
|
8287
|
|
|
|
|
8288
|
2 |
|
return $part === false ? '' : $part; |
|
8289
|
|
|
} |
|
8290
|
|
|
|
|
8291
|
|
|
/** |
|
8292
|
|
|
* Surrounds $str with the given substring. |
|
8293
|
|
|
* |
|
8294
|
|
|
* @param string $str |
|
8295
|
|
|
* @param string $substring <p>The substring to add to both sides.</p> |
|
8296
|
|
|
* |
|
8297
|
|
|
* @psalm-pure |
|
8298
|
|
|
* |
|
8299
|
|
|
* @return string |
|
8300
|
|
|
* <p>A string with the substring both prepended and appended.</p> |
|
8301
|
|
|
*/ |
|
8302
|
5 |
|
public static function str_surround(string $str, string $substring): string |
|
8303
|
|
|
{ |
|
8304
|
5 |
|
return $substring . $str . $substring; |
|
8305
|
|
|
} |
|
8306
|
|
|
|
|
8307
|
|
|
/** |
|
8308
|
|
|
* Returns a trimmed string with the first letter of each word capitalized. |
|
8309
|
|
|
* Also accepts an array, $ignore, allowing you to list words not to be |
|
8310
|
|
|
* capitalized. |
|
8311
|
|
|
* |
|
8312
|
|
|
* @param string $str |
|
8313
|
|
|
* @param array|string[]|null $ignore [optional] <p>An array of words not to capitalize or |
|
8314
|
|
|
* null. Default: null</p> |
|
8315
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
8316
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the |
|
8317
|
|
|
* string.</p> |
|
8318
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, |
|
8319
|
|
|
* el, lt, tr</p> |
|
8320
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: |
|
8321
|
|
|
* e.g. ẞ -> ß</p> |
|
8322
|
|
|
* @param bool $use_trim_first [optional] <p>true === trim the input string, |
|
8323
|
|
|
* first</p> |
|
8324
|
|
|
* @param string|null $word_define_chars [optional] <p>An string of chars that will be used as |
|
8325
|
|
|
* whitespace separator === words.</p> |
|
8326
|
|
|
* |
|
8327
|
|
|
* @psalm-pure |
|
8328
|
|
|
* |
|
8329
|
|
|
* @return string |
|
8330
|
|
|
* <p>The titleized string.</p> |
|
8331
|
|
|
*/ |
|
8332
|
10 |
|
public static function str_titleize( |
|
8333
|
|
|
string $str, |
|
8334
|
|
|
array $ignore = null, |
|
8335
|
|
|
string $encoding = 'UTF-8', |
|
8336
|
|
|
bool $clean_utf8 = false, |
|
8337
|
|
|
string $lang = null, |
|
8338
|
|
|
bool $try_to_keep_the_string_length = false, |
|
8339
|
|
|
bool $use_trim_first = true, |
|
8340
|
|
|
string $word_define_chars = null |
|
8341
|
|
|
): string { |
|
8342
|
10 |
|
if ($str === '') { |
|
8343
|
|
|
return ''; |
|
8344
|
|
|
} |
|
8345
|
|
|
|
|
8346
|
10 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
8347
|
9 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
8348
|
|
|
} |
|
8349
|
|
|
|
|
8350
|
10 |
|
if ($use_trim_first) { |
|
8351
|
10 |
|
$str = \trim($str); |
|
8352
|
|
|
} |
|
8353
|
|
|
|
|
8354
|
10 |
|
if ($clean_utf8) { |
|
8355
|
|
|
$str = self::clean($str); |
|
8356
|
|
|
} |
|
8357
|
|
|
|
|
8358
|
10 |
|
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length; |
|
8359
|
|
|
|
|
8360
|
10 |
|
if ($word_define_chars) { |
|
8361
|
4 |
|
$word_define_chars = \preg_quote($word_define_chars, '/'); |
|
8362
|
|
|
} else { |
|
8363
|
6 |
|
$word_define_chars = ''; |
|
8364
|
|
|
} |
|
8365
|
|
|
|
|
8366
|
10 |
|
$str = (string) \preg_replace_callback( |
|
8367
|
10 |
|
'/([^\\s' . $word_define_chars . ']+)/u', |
|
8368
|
|
|
static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string { |
|
8369
|
10 |
|
if ($ignore !== null && \in_array($match[0], $ignore, true)) { |
|
8370
|
4 |
|
return $match[0]; |
|
8371
|
|
|
} |
|
8372
|
|
|
|
|
8373
|
10 |
|
if ($use_mb_functions) { |
|
8374
|
10 |
|
if ($encoding === 'UTF-8') { |
|
8375
|
10 |
|
return \mb_strtoupper(\mb_substr($match[0], 0, 1)) |
|
8376
|
10 |
|
. \mb_strtolower(\mb_substr($match[0], 1)); |
|
8377
|
|
|
} |
|
8378
|
|
|
|
|
8379
|
|
|
return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding) |
|
8380
|
|
|
. \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding); |
|
8381
|
|
|
} |
|
8382
|
|
|
|
|
8383
|
|
|
return self::ucfirst( |
|
8384
|
|
|
self::strtolower( |
|
8385
|
|
|
$match[0], |
|
8386
|
|
|
$encoding, |
|
8387
|
|
|
false, |
|
8388
|
|
|
$lang, |
|
8389
|
|
|
$try_to_keep_the_string_length |
|
8390
|
|
|
), |
|
8391
|
|
|
$encoding, |
|
8392
|
|
|
false, |
|
8393
|
|
|
$lang, |
|
8394
|
|
|
$try_to_keep_the_string_length |
|
8395
|
|
|
); |
|
8396
|
10 |
|
}, |
|
8397
|
10 |
|
$str |
|
8398
|
|
|
); |
|
8399
|
|
|
|
|
8400
|
10 |
|
return $str; |
|
8401
|
|
|
} |
|
8402
|
|
|
|
|
8403
|
|
|
/** |
|
8404
|
|
|
* Convert a string into a obfuscate string. |
|
8405
|
|
|
* |
|
8406
|
|
|
* EXAMPLE: <code> |
|
8407
|
|
|
* |
|
8408
|
|
|
* UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*" |
|
8409
|
|
|
* </code> |
|
8410
|
|
|
* |
|
8411
|
|
|
* @param string $str |
|
8412
|
|
|
* @param float $percent |
|
8413
|
|
|
* @param string $obfuscateChar |
|
8414
|
|
|
* @param string[] $keepChars |
|
8415
|
|
|
* |
|
8416
|
|
|
* @psalm-pure |
|
8417
|
|
|
* |
|
8418
|
|
|
* @return string |
|
8419
|
|
|
* <p>The obfuscate string.</p> |
|
8420
|
|
|
*/ |
|
8421
|
1 |
|
public static function str_obfuscate( |
|
8422
|
|
|
string $str, |
|
8423
|
|
|
float $percent = 0.5, |
|
8424
|
|
|
string $obfuscateChar = '*', |
|
8425
|
|
|
array $keepChars = [] |
|
8426
|
|
|
): string { |
|
8427
|
1 |
|
$obfuscateCharHelper = "\u{2603}"; |
|
8428
|
1 |
|
$str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str); |
|
8429
|
|
|
|
|
8430
|
1 |
|
$chars = self::chars($str); |
|
8431
|
1 |
|
$charsMax = \count($chars); |
|
8432
|
1 |
|
$charsMaxChange = \round($charsMax * $percent); |
|
8433
|
1 |
|
$charsCounter = 0; |
|
8434
|
1 |
|
$charKeyDone = []; |
|
8435
|
|
|
|
|
8436
|
1 |
|
while ($charsCounter < $charsMaxChange) { |
|
8437
|
1 |
|
foreach ($chars as $charKey => $char) { |
|
8438
|
1 |
|
if (isset($charKeyDone[$charKey])) { |
|
8439
|
1 |
|
continue; |
|
8440
|
|
|
} |
|
8441
|
|
|
|
|
8442
|
1 |
|
if (\random_int(0, 100) > 50) { |
|
8443
|
1 |
|
continue; |
|
8444
|
|
|
} |
|
8445
|
|
|
|
|
8446
|
1 |
|
if ($char === $obfuscateChar) { |
|
8447
|
|
|
continue; |
|
8448
|
|
|
} |
|
8449
|
|
|
|
|
8450
|
1 |
|
++$charsCounter; |
|
8451
|
1 |
|
$charKeyDone[$charKey] = true; |
|
8452
|
|
|
|
|
8453
|
1 |
|
if ($charsCounter > $charsMaxChange) { |
|
8454
|
|
|
break; |
|
8455
|
|
|
} |
|
8456
|
|
|
|
|
8457
|
1 |
|
if (\in_array($char, $keepChars, true)) { |
|
8458
|
1 |
|
continue; |
|
8459
|
|
|
} |
|
8460
|
|
|
|
|
8461
|
1 |
|
$chars[$charKey] = $obfuscateChar; |
|
8462
|
|
|
} |
|
8463
|
|
|
} |
|
8464
|
|
|
|
|
8465
|
1 |
|
$str = \implode('', $chars); |
|
8466
|
|
|
|
|
8467
|
1 |
|
return \str_replace($obfuscateCharHelper, $obfuscateChar, $str); |
|
8468
|
|
|
} |
|
8469
|
|
|
|
|
8470
|
|
|
/** |
|
8471
|
|
|
* Returns a trimmed string in proper title case. |
|
8472
|
|
|
* |
|
8473
|
|
|
* Also accepts an array, $ignore, allowing you to list words not to be |
|
8474
|
|
|
* capitalized. |
|
8475
|
|
|
* |
|
8476
|
|
|
* Adapted from John Gruber's script. |
|
8477
|
|
|
* |
|
8478
|
|
|
* @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78 |
|
8479
|
|
|
* |
|
8480
|
|
|
* @param string $str |
|
8481
|
|
|
* @param array $ignore <p>An array of words not to capitalize.</p> |
|
8482
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
8483
|
|
|
* |
|
8484
|
|
|
* @psalm-pure |
|
8485
|
|
|
* |
|
8486
|
|
|
* @return string |
|
8487
|
|
|
* <p>The titleized string.</p> |
|
8488
|
|
|
*/ |
|
8489
|
35 |
|
public static function str_titleize_for_humans( |
|
8490
|
|
|
string $str, |
|
8491
|
|
|
array $ignore = [], |
|
8492
|
|
|
string $encoding = 'UTF-8' |
|
8493
|
|
|
): string { |
|
8494
|
35 |
|
if ($str === '') { |
|
8495
|
|
|
return ''; |
|
8496
|
|
|
} |
|
8497
|
|
|
|
|
8498
|
|
|
$small_words = [ |
|
8499
|
35 |
|
'(?<!q&)a', |
|
8500
|
|
|
'an', |
|
8501
|
|
|
'and', |
|
8502
|
|
|
'as', |
|
8503
|
|
|
'at(?!&t)', |
|
8504
|
|
|
'but', |
|
8505
|
|
|
'by', |
|
8506
|
|
|
'en', |
|
8507
|
|
|
'for', |
|
8508
|
|
|
'if', |
|
8509
|
|
|
'in', |
|
8510
|
|
|
'of', |
|
8511
|
|
|
'on', |
|
8512
|
|
|
'or', |
|
8513
|
|
|
'the', |
|
8514
|
|
|
'to', |
|
8515
|
|
|
'v[.]?', |
|
8516
|
|
|
'via', |
|
8517
|
|
|
'vs[.]?', |
|
8518
|
|
|
]; |
|
8519
|
|
|
|
|
8520
|
35 |
|
if ($ignore !== []) { |
|
8521
|
1 |
|
$small_words = \array_merge($small_words, $ignore); |
|
8522
|
|
|
} |
|
8523
|
|
|
|
|
8524
|
35 |
|
$small_words_rx = \implode('|', $small_words); |
|
8525
|
35 |
|
$apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?'; |
|
8526
|
|
|
|
|
8527
|
35 |
|
$str = \trim($str); |
|
8528
|
|
|
|
|
8529
|
35 |
|
if (!self::has_lowercase($str)) { |
|
8530
|
2 |
|
$str = self::strtolower($str, $encoding); |
|
8531
|
|
|
} |
|
8532
|
|
|
|
|
8533
|
|
|
// the main substitutions |
|
8534
|
35 |
|
$str = (string) \preg_replace_callback( |
|
8535
|
|
|
'~\\b (_*) (?: # 1. Leading underscore and |
|
8536
|
|
|
( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ | # 2. file path or |
|
8537
|
35 |
|
[-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' ) # URL, domain, or email |
|
8538
|
|
|
| |
|
8539
|
35 |
|
( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' ) # 3. or small word (case-insensitive) |
|
8540
|
|
|
| |
|
8541
|
35 |
|
( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' ) # 4. or word w/o internal caps |
|
8542
|
|
|
| |
|
8543
|
35 |
|
( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' ) # 5. or some other word |
|
8544
|
|
|
) (_*) \\b # 6. With trailing underscore |
|
8545
|
|
|
~ux', |
|
8546
|
|
|
/** |
|
8547
|
|
|
* @param string[] $matches |
|
8548
|
|
|
* |
|
8549
|
|
|
* @psalm-pure |
|
8550
|
|
|
* |
|
8551
|
|
|
* @return string |
|
8552
|
|
|
*/ |
|
8553
|
|
|
static function (array $matches) use ($encoding): string { |
|
8554
|
|
|
// preserve leading underscore |
|
8555
|
35 |
|
$str = $matches[1]; |
|
8556
|
35 |
|
if ($matches[2]) { |
|
8557
|
|
|
// preserve URLs, domains, emails and file paths |
|
8558
|
5 |
|
$str .= $matches[2]; |
|
8559
|
35 |
|
} elseif ($matches[3]) { |
|
8560
|
|
|
// lower-case small words |
|
8561
|
25 |
|
$str .= self::strtolower($matches[3], $encoding); |
|
8562
|
35 |
|
} elseif ($matches[4]) { |
|
8563
|
|
|
// capitalize word w/o internal caps |
|
8564
|
34 |
|
$str .= static::ucfirst($matches[4], $encoding); |
|
8565
|
|
|
} else { |
|
8566
|
|
|
// preserve other kinds of word (iPhone) |
|
8567
|
7 |
|
$str .= $matches[5]; |
|
8568
|
|
|
} |
|
8569
|
|
|
// preserve trailing underscore |
|
8570
|
35 |
|
$str .= $matches[6]; |
|
8571
|
|
|
|
|
8572
|
35 |
|
return $str; |
|
8573
|
35 |
|
}, |
|
8574
|
35 |
|
$str |
|
8575
|
|
|
); |
|
8576
|
|
|
|
|
8577
|
|
|
// Exceptions for small words: capitalize at start of title... |
|
8578
|
35 |
|
$str = (string) \preg_replace_callback( |
|
8579
|
|
|
'~( \\A [[:punct:]]* # start of title... |
|
8580
|
|
|
| [:.;?!][ ]+ # or of subsentence... |
|
8581
|
|
|
| [ ][\'"“‘(\[][ ]* ) # or of inserted subphrase... |
|
8582
|
35 |
|
( ' . $small_words_rx . ' ) \\b # ...followed by small word |
|
8583
|
|
|
~uxi', |
|
8584
|
|
|
/** |
|
8585
|
|
|
* @param string[] $matches |
|
8586
|
|
|
* |
|
8587
|
|
|
* @psalm-pure |
|
8588
|
|
|
* |
|
8589
|
|
|
* @return string |
|
8590
|
|
|
*/ |
|
8591
|
|
|
static function (array $matches) use ($encoding): string { |
|
8592
|
11 |
|
return $matches[1] . static::ucfirst($matches[2], $encoding); |
|
8593
|
35 |
|
}, |
|
8594
|
35 |
|
$str |
|
8595
|
|
|
); |
|
8596
|
|
|
|
|
8597
|
|
|
// ...and end of title |
|
8598
|
35 |
|
$str = (string) \preg_replace_callback( |
|
8599
|
35 |
|
'~\\b ( ' . $small_words_rx . ' ) # small word... |
|
8600
|
|
|
(?= [[:punct:]]* \Z # ...at the end of the title... |
|
8601
|
|
|
| [\'"’”)\]] [ ] ) # ...or of an inserted subphrase? |
|
8602
|
|
|
~uxi', |
|
8603
|
|
|
/** |
|
8604
|
|
|
* @param string[] $matches |
|
8605
|
|
|
* |
|
8606
|
|
|
* @psalm-pure |
|
8607
|
|
|
* |
|
8608
|
|
|
* @return string |
|
8609
|
|
|
*/ |
|
8610
|
|
|
static function (array $matches) use ($encoding): string { |
|
8611
|
3 |
|
return static::ucfirst($matches[1], $encoding); |
|
8612
|
35 |
|
}, |
|
8613
|
35 |
|
$str |
|
8614
|
|
|
); |
|
8615
|
|
|
|
|
8616
|
|
|
// Exceptions for small words in hyphenated compound words. |
|
8617
|
|
|
// e.g. "in-flight" -> In-Flight |
|
8618
|
35 |
|
$str = (string) \preg_replace_callback( |
|
8619
|
|
|
'~\\b |
|
8620
|
|
|
(?<! -) # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight) |
|
8621
|
35 |
|
( ' . $small_words_rx . ' ) |
|
8622
|
|
|
(?= -[[:alpha:]]+) # lookahead for "-someword" |
|
8623
|
|
|
~uxi', |
|
8624
|
|
|
/** |
|
8625
|
|
|
* @param string[] $matches |
|
8626
|
|
|
* |
|
8627
|
|
|
* @psalm-pure |
|
8628
|
|
|
* |
|
8629
|
|
|
* @return string |
|
8630
|
|
|
*/ |
|
8631
|
|
|
static function (array $matches) use ($encoding): string { |
|
8632
|
|
|
return static::ucfirst($matches[1], $encoding); |
|
8633
|
35 |
|
}, |
|
8634
|
35 |
|
$str |
|
8635
|
|
|
); |
|
8636
|
|
|
|
|
8637
|
|
|
// e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point) |
|
8638
|
35 |
|
$str = (string) \preg_replace_callback( |
|
8639
|
|
|
'~\\b |
|
8640
|
|
|
(?<!…) # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in) |
|
8641
|
|
|
( [[:alpha:]]+- ) # $1 = first word and hyphen, should already be properly capped |
|
8642
|
35 |
|
( ' . $small_words_rx . ' ) # ...followed by small word |
|
8643
|
|
|
(?! - ) # Negative lookahead for another - |
|
8644
|
|
|
~uxi', |
|
8645
|
|
|
/** |
|
8646
|
|
|
* @param string[] $matches |
|
8647
|
|
|
* |
|
8648
|
|
|
* @psalm-pure |
|
8649
|
|
|
* |
|
8650
|
|
|
* @return string |
|
8651
|
|
|
*/ |
|
8652
|
|
|
static function (array $matches) use ($encoding): string { |
|
8653
|
|
|
return $matches[1] . static::ucfirst($matches[2], $encoding); |
|
8654
|
35 |
|
}, |
|
8655
|
35 |
|
$str |
|
8656
|
|
|
); |
|
8657
|
|
|
|
|
8658
|
35 |
|
return $str; |
|
8659
|
|
|
} |
|
8660
|
|
|
|
|
8661
|
|
|
/** |
|
8662
|
|
|
* Get a binary representation of a specific string. |
|
8663
|
|
|
* |
|
8664
|
|
|
* EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code> |
|
8665
|
|
|
* |
|
8666
|
|
|
* @param string $str <p>The input string.</p> |
|
8667
|
|
|
* |
|
8668
|
|
|
* @psalm-pure |
|
8669
|
|
|
* |
|
8670
|
|
|
* @return false|string |
|
8671
|
|
|
* <p>false on error</p> |
|
8672
|
|
|
*/ |
|
8673
|
2 |
|
public static function str_to_binary(string $str) |
|
8674
|
|
|
{ |
|
8675
|
|
|
/** @var array|false $value - needed for PhpStan (stubs error) */ |
|
8676
|
2 |
|
$value = \unpack('H*', $str); |
|
8677
|
2 |
|
if ($value === false) { |
|
8678
|
|
|
return false; |
|
8679
|
|
|
} |
|
8680
|
|
|
|
|
8681
|
|
|
/** @noinspection OffsetOperationsInspection */ |
|
8682
|
2 |
|
return \base_convert($value[1], 16, 2); |
|
8683
|
|
|
} |
|
8684
|
|
|
|
|
8685
|
|
|
/** |
|
8686
|
|
|
* @param string $str |
|
8687
|
|
|
* @param bool $remove_empty_values <p>Remove empty values.</p> |
|
8688
|
|
|
* @param int|null $remove_short_values <p>The min. string length or null to disable</p> |
|
8689
|
|
|
* |
|
8690
|
|
|
* @psalm-pure |
|
8691
|
|
|
* |
|
8692
|
|
|
* @return string[] |
|
8693
|
|
|
*/ |
|
8694
|
17 |
|
public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array |
|
8695
|
|
|
{ |
|
8696
|
17 |
|
if ($str === '') { |
|
8697
|
1 |
|
return $remove_empty_values ? [] : ['']; |
|
8698
|
|
|
} |
|
8699
|
|
|
|
|
8700
|
16 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
8701
|
16 |
|
$return = \mb_split("[\r\n]{1,2}", $str); |
|
8702
|
|
|
} else { |
|
8703
|
|
|
$return = \preg_split("/[\r\n]{1,2}/u", $str); |
|
8704
|
|
|
} |
|
8705
|
|
|
|
|
8706
|
16 |
|
if ($return === false) { |
|
8707
|
|
|
return $remove_empty_values ? [] : ['']; |
|
8708
|
|
|
} |
|
8709
|
|
|
|
|
8710
|
|
|
if ( |
|
8711
|
16 |
|
$remove_short_values === null |
|
8712
|
|
|
&& |
|
8713
|
16 |
|
!$remove_empty_values |
|
8714
|
|
|
) { |
|
8715
|
16 |
|
return $return; |
|
8716
|
|
|
} |
|
8717
|
|
|
|
|
8718
|
|
|
return self::reduce_string_array( |
|
8719
|
|
|
$return, |
|
8720
|
|
|
$remove_empty_values, |
|
8721
|
|
|
$remove_short_values |
|
8722
|
|
|
); |
|
8723
|
|
|
} |
|
8724
|
|
|
|
|
8725
|
|
|
/** |
|
8726
|
|
|
* Convert a string into an array of words. |
|
8727
|
|
|
* |
|
8728
|
|
|
* EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code> |
|
8729
|
|
|
* |
|
8730
|
|
|
* @param string $str |
|
8731
|
|
|
* @param string $char_list <p>Additional chars for the definition of "words".</p> |
|
8732
|
|
|
* @param bool $remove_empty_values <p>Remove empty values.</p> |
|
8733
|
|
|
* @param int|null $remove_short_values <p>The min. string length or null to disable</p> |
|
8734
|
|
|
* |
|
8735
|
|
|
* @psalm-pure |
|
8736
|
|
|
* |
|
8737
|
|
|
* @return string[] |
|
8738
|
|
|
*/ |
|
8739
|
16 |
|
public static function str_to_words( |
|
8740
|
|
|
string $str, |
|
8741
|
|
|
string $char_list = '', |
|
8742
|
|
|
bool $remove_empty_values = false, |
|
8743
|
|
|
int $remove_short_values = null |
|
8744
|
|
|
): array { |
|
8745
|
16 |
|
if ($str === '') { |
|
8746
|
4 |
|
return $remove_empty_values ? [] : ['']; |
|
8747
|
|
|
} |
|
8748
|
|
|
|
|
8749
|
16 |
|
$char_list = self::rxClass($char_list, '\pL'); |
|
8750
|
|
|
|
|
8751
|
16 |
|
$return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE); |
|
8752
|
16 |
|
if ($return === false) { |
|
8753
|
|
|
return $remove_empty_values ? [] : ['']; |
|
8754
|
|
|
} |
|
8755
|
|
|
|
|
8756
|
|
|
if ( |
|
8757
|
16 |
|
$remove_short_values === null |
|
8758
|
|
|
&& |
|
8759
|
16 |
|
!$remove_empty_values |
|
8760
|
|
|
) { |
|
8761
|
16 |
|
return $return; |
|
|
|
|
|
|
8762
|
|
|
} |
|
8763
|
|
|
|
|
8764
|
2 |
|
$tmp_return = self::reduce_string_array( |
|
8765
|
2 |
|
$return, |
|
8766
|
2 |
|
$remove_empty_values, |
|
8767
|
2 |
|
$remove_short_values |
|
8768
|
|
|
); |
|
8769
|
|
|
|
|
8770
|
2 |
|
foreach ($tmp_return as &$item) { |
|
8771
|
2 |
|
$item = (string) $item; |
|
8772
|
|
|
} |
|
8773
|
|
|
|
|
8774
|
2 |
|
return $tmp_return; |
|
8775
|
|
|
} |
|
8776
|
|
|
|
|
8777
|
|
|
/** |
|
8778
|
|
|
* Truncates the string to a given length. If $substring is provided, and |
|
8779
|
|
|
* truncating occurs, the string is further truncated so that the substring |
|
8780
|
|
|
* may be appended without exceeding the desired length. |
|
8781
|
|
|
* |
|
8782
|
|
|
* @param string $str |
|
8783
|
|
|
* @param int $length <p>Desired length of the truncated string.</p> |
|
8784
|
|
|
* @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p> |
|
8785
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
8786
|
|
|
* |
|
8787
|
|
|
* @psalm-pure |
|
8788
|
|
|
* |
|
8789
|
|
|
* @return string |
|
8790
|
|
|
* <p>A string after truncating.</p> |
|
8791
|
|
|
*/ |
|
8792
|
22 |
|
public static function str_truncate( |
|
8793
|
|
|
string $str, |
|
8794
|
|
|
int $length, |
|
8795
|
|
|
string $substring = '', |
|
8796
|
|
|
string $encoding = 'UTF-8' |
|
8797
|
|
|
): string { |
|
8798
|
22 |
|
if ($str === '') { |
|
8799
|
|
|
return ''; |
|
8800
|
|
|
} |
|
8801
|
|
|
|
|
8802
|
22 |
|
if ($encoding === 'UTF-8') { |
|
8803
|
10 |
|
if ($length >= (int) \mb_strlen($str)) { |
|
8804
|
2 |
|
return $str; |
|
8805
|
|
|
} |
|
8806
|
|
|
|
|
8807
|
8 |
|
if ($substring !== '') { |
|
8808
|
4 |
|
$length -= (int) \mb_strlen($substring); |
|
8809
|
|
|
|
|
8810
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
|
8811
|
4 |
|
return (string) \mb_substr($str, 0, $length) . $substring; |
|
8812
|
|
|
} |
|
8813
|
|
|
|
|
8814
|
4 |
|
return (string) \mb_substr($str, 0, $length); |
|
8815
|
|
|
} |
|
8816
|
|
|
|
|
8817
|
12 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
8818
|
|
|
|
|
8819
|
12 |
|
if ($length >= (int) self::strlen($str, $encoding)) { |
|
8820
|
2 |
|
return $str; |
|
8821
|
|
|
} |
|
8822
|
|
|
|
|
8823
|
10 |
|
if ($substring !== '') { |
|
8824
|
6 |
|
$length -= (int) self::strlen($substring, $encoding); |
|
8825
|
|
|
} |
|
8826
|
|
|
|
|
8827
|
|
|
return ( |
|
8828
|
10 |
|
(string) self::substr( |
|
8829
|
10 |
|
$str, |
|
8830
|
10 |
|
0, |
|
8831
|
10 |
|
$length, |
|
8832
|
10 |
|
$encoding |
|
8833
|
|
|
) |
|
8834
|
10 |
|
) . $substring; |
|
8835
|
|
|
} |
|
8836
|
|
|
|
|
8837
|
|
|
/** |
|
8838
|
|
|
* Truncates the string to a given length, while ensuring that it does not |
|
8839
|
|
|
* split words. If $substring is provided, and truncating occurs, the |
|
8840
|
|
|
* string is further truncated so that the substring may be appended without |
|
8841
|
|
|
* exceeding the desired length. |
|
8842
|
|
|
* |
|
8843
|
|
|
* @param string $str |
|
8844
|
|
|
* @param int $length <p>Desired length of the truncated string.</p> |
|
8845
|
|
|
* @param string $substring [optional] <p>The substring to append if it can fit. |
|
8846
|
|
|
* Default: |
|
8847
|
|
|
* ''</p> |
|
8848
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
8849
|
|
|
* @param bool $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p> |
|
8850
|
|
|
* |
|
8851
|
|
|
* @psalm-pure |
|
8852
|
|
|
* |
|
8853
|
|
|
* @return string |
|
8854
|
|
|
* <p>A string after truncating.</p> |
|
8855
|
|
|
*/ |
|
8856
|
47 |
|
public static function str_truncate_safe( |
|
8857
|
|
|
string $str, |
|
8858
|
|
|
int $length, |
|
8859
|
|
|
string $substring = '', |
|
8860
|
|
|
string $encoding = 'UTF-8', |
|
8861
|
|
|
bool $ignore_do_not_split_words_for_one_word = false |
|
8862
|
|
|
): string { |
|
8863
|
47 |
|
if ($str === '' || $length <= 0) { |
|
8864
|
1 |
|
return $substring; |
|
8865
|
|
|
} |
|
8866
|
|
|
|
|
8867
|
47 |
|
if ($encoding === 'UTF-8') { |
|
8868
|
21 |
|
if ($length >= (int) \mb_strlen($str)) { |
|
8869
|
5 |
|
return $str; |
|
8870
|
|
|
} |
|
8871
|
|
|
|
|
8872
|
|
|
// need to further trim the string so we can append the substring |
|
8873
|
17 |
|
$length -= (int) \mb_strlen($substring); |
|
8874
|
17 |
|
if ($length <= 0) { |
|
8875
|
1 |
|
return $substring; |
|
8876
|
|
|
} |
|
8877
|
|
|
|
|
8878
|
|
|
/** @var false|string $truncated - needed for PhpStan (stubs error) */ |
|
8879
|
17 |
|
$truncated = \mb_substr($str, 0, $length); |
|
8880
|
17 |
|
if ($truncated === false) { |
|
8881
|
|
|
return ''; |
|
8882
|
|
|
} |
|
8883
|
|
|
|
|
8884
|
|
|
// if the last word was truncated |
|
8885
|
17 |
|
$space_position = \mb_strpos($str, ' ', $length - 1); |
|
8886
|
17 |
|
if ($space_position !== $length) { |
|
8887
|
|
|
// find pos of the last occurrence of a space, get up to that |
|
8888
|
13 |
|
$last_position = \mb_strrpos($truncated, ' ', 0); |
|
8889
|
|
|
|
|
8890
|
|
|
if ( |
|
8891
|
13 |
|
$last_position !== false |
|
8892
|
|
|
|| |
|
8893
|
|
|
( |
|
8894
|
3 |
|
$space_position !== false |
|
8895
|
|
|
&& |
|
8896
|
13 |
|
!$ignore_do_not_split_words_for_one_word |
|
8897
|
|
|
) |
|
8898
|
|
|
) { |
|
8899
|
17 |
|
$truncated = (string) \mb_substr($truncated, 0, (int) $last_position); |
|
8900
|
|
|
} |
|
8901
|
|
|
} |
|
8902
|
|
|
} else { |
|
8903
|
26 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
8904
|
|
|
|
|
8905
|
26 |
|
if ($length >= (int) self::strlen($str, $encoding)) { |
|
8906
|
4 |
|
return $str; |
|
8907
|
|
|
} |
|
8908
|
|
|
|
|
8909
|
|
|
// need to further trim the string so we can append the substring |
|
8910
|
22 |
|
$length -= (int) self::strlen($substring, $encoding); |
|
8911
|
22 |
|
if ($length <= 0) { |
|
8912
|
|
|
return $substring; |
|
8913
|
|
|
} |
|
8914
|
|
|
|
|
8915
|
22 |
|
$truncated = self::substr($str, 0, $length, $encoding); |
|
8916
|
|
|
|
|
8917
|
22 |
|
if ($truncated === false) { |
|
8918
|
|
|
return ''; |
|
8919
|
|
|
} |
|
8920
|
|
|
|
|
8921
|
|
|
// if the last word was truncated |
|
8922
|
22 |
|
$space_position = self::strpos($str, ' ', $length - 1, $encoding); |
|
8923
|
22 |
|
if ($space_position !== $length) { |
|
8924
|
|
|
// find pos of the last occurrence of a space, get up to that |
|
8925
|
12 |
|
$last_position = self::strrpos($truncated, ' ', 0, $encoding); |
|
8926
|
|
|
|
|
8927
|
|
|
if ( |
|
8928
|
12 |
|
$last_position !== false |
|
8929
|
|
|
|| |
|
8930
|
|
|
( |
|
8931
|
4 |
|
$space_position !== false |
|
8932
|
|
|
&& |
|
8933
|
12 |
|
!$ignore_do_not_split_words_for_one_word |
|
8934
|
|
|
) |
|
8935
|
|
|
) { |
|
8936
|
9 |
|
$truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding); |
|
8937
|
|
|
} |
|
8938
|
|
|
} |
|
8939
|
|
|
} |
|
8940
|
|
|
|
|
8941
|
39 |
|
return $truncated . $substring; |
|
8942
|
|
|
} |
|
8943
|
|
|
|
|
8944
|
|
|
/** |
|
8945
|
|
|
* Returns a lowercase and trimmed string separated by underscores. |
|
8946
|
|
|
* Underscores are inserted before uppercase characters (with the exception |
|
8947
|
|
|
* of the first character of the string), and in place of spaces as well as |
|
8948
|
|
|
* dashes. |
|
8949
|
|
|
* |
|
8950
|
|
|
* @param string $str |
|
8951
|
|
|
* |
|
8952
|
|
|
* @psalm-pure |
|
8953
|
|
|
* |
|
8954
|
|
|
* @return string |
|
8955
|
|
|
* <p>The underscored string.</p> |
|
8956
|
|
|
*/ |
|
8957
|
16 |
|
public static function str_underscored(string $str): string |
|
8958
|
|
|
{ |
|
8959
|
16 |
|
return self::str_delimit($str, '_'); |
|
8960
|
|
|
} |
|
8961
|
|
|
|
|
8962
|
|
|
/** |
|
8963
|
|
|
* Returns an UpperCamelCase version of the supplied string. It trims |
|
8964
|
|
|
* surrounding spaces, capitalizes letters following digits, spaces, dashes |
|
8965
|
|
|
* and underscores, and removes spaces, dashes, underscores. |
|
8966
|
|
|
* |
|
8967
|
|
|
* @param string $str <p>The input string.</p> |
|
8968
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
|
8969
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
8970
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
|
8971
|
|
|
* tr</p> |
|
8972
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
|
8973
|
|
|
* -> ß</p> |
|
8974
|
|
|
* |
|
8975
|
|
|
* @psalm-pure |
|
8976
|
|
|
* |
|
8977
|
|
|
* @return string |
|
8978
|
|
|
* <p>A string in UpperCamelCase.</p> |
|
8979
|
|
|
*/ |
|
8980
|
13 |
|
public static function str_upper_camelize( |
|
8981
|
|
|
string $str, |
|
8982
|
|
|
string $encoding = 'UTF-8', |
|
8983
|
|
|
bool $clean_utf8 = false, |
|
8984
|
|
|
string $lang = null, |
|
8985
|
|
|
bool $try_to_keep_the_string_length = false |
|
8986
|
|
|
): string { |
|
8987
|
13 |
|
return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); |
|
8988
|
|
|
} |
|
8989
|
|
|
|
|
8990
|
|
|
/** |
|
8991
|
|
|
* Get the number of words in a specific string. |
|
8992
|
|
|
* |
|
8993
|
|
|
* EXAMPLES: <code> |
|
8994
|
|
|
* // format: 0 -> return only word count (int) |
|
8995
|
|
|
* // |
|
8996
|
|
|
* UTF8::str_word_count('中文空白 öäü abc#c'); // 4 |
|
8997
|
|
|
* UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3 |
|
8998
|
|
|
* |
|
8999
|
|
|
* // format: 1 -> return words (array) |
|
9000
|
|
|
* // |
|
9001
|
|
|
* UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c') |
|
9002
|
|
|
* UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c') |
|
9003
|
|
|
* |
|
9004
|
|
|
* // format: 2 -> return words with offset (array) |
|
9005
|
|
|
* // |
|
9006
|
|
|
* UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c') |
|
9007
|
|
|
* UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c') |
|
9008
|
|
|
* </code> |
|
9009
|
|
|
* |
|
9010
|
|
|
* @param string $str <p>The input string.</p> |
|
9011
|
|
|
* @param int $format [optional] <p> |
|
9012
|
|
|
* <strong>0</strong> => return a number of words (default)<br> |
|
9013
|
|
|
* <strong>1</strong> => return an array of words<br> |
|
9014
|
|
|
* <strong>2</strong> => return an array of words with word-offset as key |
|
9015
|
|
|
* </p> |
|
9016
|
|
|
* @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p> |
|
9017
|
|
|
* |
|
9018
|
|
|
* @psalm-pure |
|
9019
|
|
|
* |
|
9020
|
|
|
* @return int|string[] |
|
9021
|
|
|
* <p>The number of words in the string.</p> |
|
9022
|
|
|
*/ |
|
9023
|
2 |
|
public static function str_word_count(string $str, int $format = 0, string $char_list = '') |
|
9024
|
|
|
{ |
|
9025
|
2 |
|
$str_parts = self::str_to_words($str, $char_list); |
|
9026
|
|
|
|
|
9027
|
2 |
|
$len = \count($str_parts); |
|
9028
|
|
|
|
|
9029
|
2 |
|
if ($format === 1) { |
|
9030
|
2 |
|
$number_of_words = []; |
|
9031
|
2 |
|
for ($i = 1; $i < $len; $i += 2) { |
|
9032
|
2 |
|
$number_of_words[] = $str_parts[$i]; |
|
9033
|
|
|
} |
|
9034
|
2 |
|
} elseif ($format === 2) { |
|
9035
|
2 |
|
$number_of_words = []; |
|
9036
|
2 |
|
$offset = (int) self::strlen($str_parts[0]); |
|
9037
|
2 |
|
for ($i = 1; $i < $len; $i += 2) { |
|
9038
|
2 |
|
$number_of_words[$offset] = $str_parts[$i]; |
|
9039
|
2 |
|
$offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]); |
|
9040
|
|
|
} |
|
9041
|
|
|
} else { |
|
9042
|
2 |
|
$number_of_words = (int) (($len - 1) / 2); |
|
9043
|
|
|
} |
|
9044
|
|
|
|
|
9045
|
2 |
|
return $number_of_words; |
|
9046
|
|
|
} |
|
9047
|
|
|
|
|
9048
|
|
|
/** |
|
9049
|
|
|
* Case-insensitive string comparison. |
|
9050
|
|
|
* |
|
9051
|
|
|
* INFO: Case-insensitive version of UTF8::strcmp() |
|
9052
|
|
|
* |
|
9053
|
|
|
* EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code> |
|
9054
|
|
|
* |
|
9055
|
|
|
* @param string $str1 <p>The first string.</p> |
|
9056
|
|
|
* @param string $str2 <p>The second string.</p> |
|
9057
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
9058
|
|
|
* |
|
9059
|
|
|
* @psalm-pure |
|
9060
|
|
|
* |
|
9061
|
|
|
* @return int |
|
9062
|
|
|
* <strong>< 0</strong> if str1 is less than str2;<br> |
|
9063
|
|
|
* <strong>> 0</strong> if str1 is greater than str2,<br> |
|
9064
|
|
|
* <strong>0</strong> if they are equal |
|
9065
|
|
|
*/ |
|
9066
|
23 |
|
public static function strcasecmp( |
|
9067
|
|
|
string $str1, |
|
9068
|
|
|
string $str2, |
|
9069
|
|
|
string $encoding = 'UTF-8' |
|
9070
|
|
|
): int { |
|
9071
|
23 |
|
return self::strcmp( |
|
9072
|
23 |
|
self::strtocasefold( |
|
9073
|
23 |
|
$str1, |
|
9074
|
23 |
|
true, |
|
9075
|
23 |
|
false, |
|
9076
|
23 |
|
$encoding, |
|
9077
|
23 |
|
null, |
|
9078
|
23 |
|
false |
|
9079
|
|
|
), |
|
9080
|
23 |
|
self::strtocasefold( |
|
9081
|
23 |
|
$str2, |
|
9082
|
23 |
|
true, |
|
9083
|
23 |
|
false, |
|
9084
|
23 |
|
$encoding, |
|
9085
|
23 |
|
null, |
|
9086
|
23 |
|
false |
|
9087
|
|
|
) |
|
9088
|
|
|
); |
|
9089
|
|
|
} |
|
9090
|
|
|
|
|
9091
|
|
|
/** |
|
9092
|
|
|
* Case-sensitive string comparison. |
|
9093
|
|
|
* |
|
9094
|
|
|
* EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code> |
|
9095
|
|
|
* |
|
9096
|
|
|
* @param string $str1 <p>The first string.</p> |
|
9097
|
|
|
* @param string $str2 <p>The second string.</p> |
|
9098
|
|
|
* |
|
9099
|
|
|
* @psalm-pure |
|
9100
|
|
|
* |
|
9101
|
|
|
* @return int |
|
9102
|
|
|
* <strong>< 0</strong> if str1 is less than str2<br> |
|
9103
|
|
|
* <strong>> 0</strong> if str1 is greater than str2<br> |
|
9104
|
|
|
* <strong>0</strong> if they are equal |
|
9105
|
|
|
*/ |
|
9106
|
29 |
|
public static function strcmp(string $str1, string $str2): int |
|
9107
|
|
|
{ |
|
9108
|
29 |
|
if ($str1 === $str2) { |
|
9109
|
21 |
|
return 0; |
|
9110
|
|
|
} |
|
9111
|
|
|
|
|
9112
|
24 |
|
return \strcmp( |
|
9113
|
|
|
/** @phpstan-ignore-next-line - we use only NFD */ |
|
9114
|
24 |
|
\Normalizer::normalize($str1, \Normalizer::NFD), |
|
9115
|
|
|
/** @phpstan-ignore-next-line - we use only NFD */ |
|
9116
|
24 |
|
\Normalizer::normalize($str2, \Normalizer::NFD) |
|
9117
|
|
|
); |
|
9118
|
|
|
} |
|
9119
|
|
|
|
|
9120
|
|
|
/** |
|
9121
|
|
|
* Find length of initial segment not matching mask. |
|
9122
|
|
|
* |
|
9123
|
|
|
* @param string $str |
|
9124
|
|
|
* @param string $char_list |
|
9125
|
|
|
* @param int $offset |
|
9126
|
|
|
* @param int|null $length |
|
9127
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
9128
|
|
|
* |
|
9129
|
|
|
* @psalm-pure |
|
9130
|
|
|
* |
|
9131
|
|
|
* @return int |
|
9132
|
|
|
*/ |
|
9133
|
12 |
|
public static function strcspn( |
|
9134
|
|
|
string $str, |
|
9135
|
|
|
string $char_list, |
|
9136
|
|
|
int $offset = 0, |
|
9137
|
|
|
int $length = null, |
|
9138
|
|
|
string $encoding = 'UTF-8' |
|
9139
|
|
|
): int { |
|
9140
|
12 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
9141
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
9142
|
|
|
} |
|
9143
|
|
|
|
|
9144
|
12 |
|
if ($char_list === '') { |
|
9145
|
2 |
|
return (int) self::strlen($str, $encoding); |
|
9146
|
|
|
} |
|
9147
|
|
|
|
|
9148
|
11 |
|
if ($offset || $length !== null) { |
|
9149
|
3 |
|
if ($encoding === 'UTF-8') { |
|
9150
|
3 |
|
if ($length === null) { |
|
9151
|
2 |
|
$str_tmp = \mb_substr($str, $offset); |
|
9152
|
|
|
} else { |
|
9153
|
3 |
|
$str_tmp = \mb_substr($str, $offset, $length); |
|
9154
|
|
|
} |
|
9155
|
|
|
} else { |
|
9156
|
|
|
$str_tmp = self::substr($str, $offset, $length, $encoding); |
|
9157
|
|
|
} |
|
9158
|
|
|
|
|
9159
|
3 |
|
if ($str_tmp === false) { |
|
9160
|
|
|
return 0; |
|
9161
|
|
|
} |
|
9162
|
|
|
|
|
9163
|
3 |
|
$str = $str_tmp; |
|
9164
|
|
|
} |
|
9165
|
|
|
|
|
9166
|
11 |
|
if ($str === '') { |
|
9167
|
2 |
|
return 0; |
|
9168
|
|
|
} |
|
9169
|
|
|
|
|
9170
|
10 |
|
$matches = []; |
|
9171
|
10 |
|
if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) { |
|
9172
|
9 |
|
$return = self::strlen($matches[1], $encoding); |
|
9173
|
9 |
|
if ($return === false) { |
|
9174
|
|
|
return 0; |
|
9175
|
|
|
} |
|
9176
|
|
|
|
|
9177
|
9 |
|
return $return; |
|
9178
|
|
|
} |
|
9179
|
|
|
|
|
9180
|
2 |
|
return (int) self::strlen($str, $encoding); |
|
9181
|
|
|
} |
|
9182
|
|
|
|
|
9183
|
|
|
/** |
|
9184
|
|
|
* Create a UTF-8 string from code points. |
|
9185
|
|
|
* |
|
9186
|
|
|
* INFO: opposite to UTF8::codepoints() |
|
9187
|
|
|
* |
|
9188
|
|
|
* EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code> |
|
9189
|
|
|
* |
|
9190
|
|
|
* @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p> |
|
9191
|
|
|
* |
|
9192
|
|
|
* @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex |
|
9193
|
|
|
* |
|
9194
|
|
|
* @psalm-pure |
|
9195
|
|
|
* |
|
9196
|
|
|
* @return string |
|
9197
|
|
|
* <p>A UTF-8 encoded string.</p> |
|
9198
|
|
|
*/ |
|
9199
|
4 |
|
public static function string($intOrHex): string |
|
9200
|
|
|
{ |
|
9201
|
4 |
|
if ($intOrHex === []) { |
|
9202
|
4 |
|
return ''; |
|
9203
|
|
|
} |
|
9204
|
|
|
|
|
9205
|
4 |
|
if (!\is_array($intOrHex)) { |
|
9206
|
1 |
|
$intOrHex = [$intOrHex]; |
|
9207
|
|
|
} |
|
9208
|
|
|
|
|
9209
|
4 |
|
$str = ''; |
|
9210
|
4 |
|
foreach ($intOrHex as $strPart) { |
|
9211
|
4 |
|
$str .= '&#' . (int) $strPart . ';'; |
|
9212
|
|
|
} |
|
9213
|
|
|
|
|
9214
|
|
|
// We cannot use html_entity_decode() here, as it will not return |
|
9215
|
|
|
// characters for many values < 160. |
|
9216
|
4 |
|
return mb_convert_encoding($str, 'UTF-8', 'HTML-ENTITIES'); |
|
|
|
|
|
|
9217
|
|
|
} |
|
9218
|
|
|
|
|
9219
|
|
|
/** |
|
9220
|
|
|
* Checks if string starts with "BOM" (Byte Order Mark Character) character. |
|
9221
|
|
|
* |
|
9222
|
|
|
* EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code> |
|
9223
|
|
|
* |
|
9224
|
|
|
* @param string $str <p>The input string.</p> |
|
9225
|
|
|
* |
|
9226
|
|
|
* @psalm-pure |
|
9227
|
|
|
* |
|
9228
|
|
|
* @return bool |
|
9229
|
|
|
* <p> |
|
9230
|
|
|
* <strong>true</strong> if the string has BOM at the start,<br> |
|
9231
|
|
|
* <strong>false</strong> otherwise |
|
9232
|
|
|
* </p> |
|
9233
|
|
|
*/ |
|
9234
|
40 |
|
public static function string_has_bom(string $str): bool |
|
9235
|
|
|
{ |
|
9236
|
40 |
|
foreach (self::$BOM as $bom_string => &$bom_byte_length) { |
|
9237
|
40 |
|
if (\strncmp($str, $bom_string, $bom_byte_length) === 0) { |
|
9238
|
40 |
|
return true; |
|
9239
|
|
|
} |
|
9240
|
|
|
} |
|
9241
|
|
|
|
|
9242
|
40 |
|
return false; |
|
9243
|
|
|
} |
|
9244
|
|
|
|
|
9245
|
|
|
/** |
|
9246
|
|
|
* Strip HTML and PHP tags from a string + clean invalid UTF-8. |
|
9247
|
|
|
* |
|
9248
|
|
|
* EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code> |
|
9249
|
|
|
* |
|
9250
|
|
|
* @see http://php.net/manual/en/function.strip-tags.php |
|
9251
|
|
|
* |
|
9252
|
|
|
* @param string $str <p> |
|
9253
|
|
|
* The input string. |
|
9254
|
|
|
* </p> |
|
9255
|
|
|
* @param string|null $allowable_tags [optional] <p> |
|
9256
|
|
|
* You can use the optional second parameter to specify tags which should |
|
9257
|
|
|
* not be stripped. |
|
9258
|
|
|
* </p> |
|
9259
|
|
|
* <p> |
|
9260
|
|
|
* HTML comments and PHP tags are also stripped. This is hardcoded and |
|
9261
|
|
|
* can not be changed with allowable_tags. |
|
9262
|
|
|
* </p> |
|
9263
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
9264
|
|
|
* |
|
9265
|
|
|
* @psalm-pure |
|
9266
|
|
|
* |
|
9267
|
|
|
* @return string |
|
9268
|
|
|
* <p>The stripped string.</p> |
|
9269
|
|
|
*/ |
|
9270
|
4 |
|
public static function strip_tags( |
|
9271
|
|
|
string $str, |
|
9272
|
|
|
string $allowable_tags = null, |
|
9273
|
|
|
bool $clean_utf8 = false |
|
9274
|
|
|
): string { |
|
9275
|
4 |
|
if ($str === '') { |
|
9276
|
1 |
|
return ''; |
|
9277
|
|
|
} |
|
9278
|
|
|
|
|
9279
|
4 |
|
if ($clean_utf8) { |
|
9280
|
2 |
|
$str = self::clean($str); |
|
9281
|
|
|
} |
|
9282
|
|
|
|
|
9283
|
4 |
|
if ($allowable_tags === null) { |
|
9284
|
4 |
|
return \strip_tags($str); |
|
9285
|
|
|
} |
|
9286
|
|
|
|
|
9287
|
2 |
|
return \strip_tags($str, $allowable_tags); |
|
9288
|
|
|
} |
|
9289
|
|
|
|
|
9290
|
|
|
/** |
|
9291
|
|
|
* Strip all whitespace characters. This includes tabs and newline |
|
9292
|
|
|
* characters, as well as multibyte whitespace such as the thin space |
|
9293
|
|
|
* and ideographic space. |
|
9294
|
|
|
* |
|
9295
|
|
|
* EXAMPLE: <code>UTF8::strip_whitespace(' Ο συγγραφέας '); // 'Οσυγγραφέας'</code> |
|
9296
|
|
|
* |
|
9297
|
|
|
* @param string $str |
|
9298
|
|
|
* |
|
9299
|
|
|
* @psalm-pure |
|
9300
|
|
|
* |
|
9301
|
|
|
* @return string |
|
9302
|
|
|
*/ |
|
9303
|
36 |
|
public static function strip_whitespace(string $str): string |
|
9304
|
|
|
{ |
|
9305
|
36 |
|
if ($str === '') { |
|
9306
|
3 |
|
return ''; |
|
9307
|
|
|
} |
|
9308
|
|
|
|
|
9309
|
33 |
|
return (string) \preg_replace('/[[:space:]]+/u', '', $str); |
|
9310
|
|
|
} |
|
9311
|
|
|
|
|
9312
|
|
|
/** |
|
9313
|
|
|
* Find the position of the first occurrence of a substring in a string, case-insensitive. |
|
9314
|
|
|
* |
|
9315
|
|
|
* INFO: use UTF8::stripos_in_byte() for the byte-length |
|
9316
|
|
|
* |
|
9317
|
|
|
* EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ) |
|
9318
|
|
|
* |
|
9319
|
|
|
* @see http://php.net/manual/en/function.mb-stripos.php |
|
9320
|
|
|
* |
|
9321
|
|
|
* @param string $haystack <p>The string from which to get the position of the first occurrence of needle.</p> |
|
9322
|
|
|
* @param string $needle <p>The string to find in haystack.</p> |
|
9323
|
|
|
* @param int $offset [optional] <p>The position in haystack to start searching.</p> |
|
9324
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
9325
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
9326
|
|
|
* |
|
9327
|
|
|
* @psalm-pure |
|
9328
|
|
|
* |
|
9329
|
|
|
* @return false|int |
|
9330
|
|
|
* Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the |
|
9331
|
|
|
* haystack string,<br> or <strong>false</strong> if needle is not found |
|
9332
|
|
|
*/ |
|
9333
|
25 |
|
public static function stripos( |
|
9334
|
|
|
string $haystack, |
|
9335
|
|
|
string $needle, |
|
9336
|
|
|
int $offset = 0, |
|
9337
|
|
|
string $encoding = 'UTF-8', |
|
9338
|
|
|
bool $clean_utf8 = false |
|
9339
|
|
|
) { |
|
9340
|
25 |
|
if ($haystack === '') { |
|
9341
|
5 |
|
if (\PHP_VERSION_ID >= 80000 && $needle === '') { |
|
9342
|
|
|
return 0; |
|
9343
|
|
|
} |
|
9344
|
|
|
|
|
9345
|
5 |
|
return false; |
|
9346
|
|
|
} |
|
9347
|
|
|
|
|
9348
|
24 |
|
if ($needle === '' && \PHP_VERSION_ID < 80000) { |
|
9349
|
2 |
|
return false; |
|
9350
|
|
|
} |
|
9351
|
|
|
|
|
9352
|
24 |
|
if ($clean_utf8) { |
|
9353
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
|
9354
|
|
|
// if invalid characters are found in $haystack before $needle |
|
9355
|
1 |
|
$haystack = self::clean($haystack); |
|
9356
|
1 |
|
$needle = self::clean($needle); |
|
9357
|
|
|
} |
|
9358
|
|
|
|
|
9359
|
24 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
9360
|
24 |
|
if ($encoding === 'UTF-8') { |
|
9361
|
24 |
|
return \mb_stripos($haystack, $needle, $offset); |
|
9362
|
|
|
} |
|
9363
|
|
|
|
|
9364
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
9365
|
|
|
|
|
9366
|
2 |
|
return \mb_stripos($haystack, $needle, $offset, $encoding); |
|
9367
|
|
|
} |
|
9368
|
|
|
|
|
9369
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
9370
|
|
|
|
|
9371
|
|
|
if ( |
|
9372
|
2 |
|
$encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings |
|
9373
|
|
|
&& |
|
9374
|
2 |
|
$offset >= 0 // grapheme_stripos() can't handle negative offset |
|
9375
|
|
|
&& |
|
9376
|
2 |
|
self::$SUPPORT['intl'] === true |
|
9377
|
|
|
) { |
|
9378
|
|
|
$return_tmp = \grapheme_stripos($haystack, $needle, $offset); |
|
9379
|
|
|
if ($return_tmp !== false) { |
|
9380
|
|
|
return $return_tmp; |
|
9381
|
|
|
} |
|
9382
|
|
|
} |
|
9383
|
|
|
|
|
9384
|
|
|
// |
|
9385
|
|
|
// fallback for ascii only |
|
9386
|
|
|
// |
|
9387
|
|
|
|
|
9388
|
2 |
|
if (ASCII::is_ascii($haystack . $needle)) { |
|
9389
|
2 |
|
return \stripos($haystack, $needle, $offset); |
|
9390
|
|
|
} |
|
9391
|
|
|
|
|
9392
|
|
|
// |
|
9393
|
|
|
// fallback via vanilla php |
|
9394
|
|
|
// |
|
9395
|
|
|
|
|
9396
|
2 |
|
$haystack = self::strtocasefold($haystack, true, false, $encoding, null, false); |
|
9397
|
2 |
|
$needle = self::strtocasefold($needle, true, false, $encoding, null, false); |
|
9398
|
|
|
|
|
9399
|
2 |
|
return self::strpos($haystack, $needle, $offset, $encoding); |
|
9400
|
|
|
} |
|
9401
|
|
|
|
|
9402
|
|
|
/** |
|
9403
|
|
|
* Returns all of haystack starting from and including the first occurrence of needle to the end. |
|
9404
|
|
|
* |
|
9405
|
|
|
* EXAMPLE: <code> |
|
9406
|
|
|
* $str = 'iñtërnâtiônàlizætiøn'; |
|
9407
|
|
|
* $search = 'NÂT'; |
|
9408
|
|
|
* |
|
9409
|
|
|
* UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn' |
|
9410
|
|
|
* UTF8::stristr($str, $search, true)); // 'iñtër' |
|
9411
|
|
|
* </code> |
|
9412
|
|
|
* |
|
9413
|
|
|
* @param string $haystack <p>The input string. Must be valid UTF-8.</p> |
|
9414
|
|
|
* @param string $needle <p>The string to look for. Must be valid UTF-8.</p> |
|
9415
|
|
|
* @param bool $before_needle [optional] <p> |
|
9416
|
|
|
* If <b>TRUE</b>, it returns the part of the |
|
9417
|
|
|
* haystack before the first occurrence of the needle (excluding the needle). |
|
9418
|
|
|
* </p> |
|
9419
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
9420
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
9421
|
|
|
* |
|
9422
|
|
|
* @psalm-pure |
|
9423
|
|
|
* |
|
9424
|
|
|
* @return false|string |
|
9425
|
|
|
* <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p> |
|
9426
|
|
|
*/ |
|
9427
|
13 |
|
public static function stristr( |
|
9428
|
|
|
string $haystack, |
|
9429
|
|
|
string $needle, |
|
9430
|
|
|
bool $before_needle = false, |
|
9431
|
|
|
string $encoding = 'UTF-8', |
|
9432
|
|
|
bool $clean_utf8 = false |
|
9433
|
|
|
) { |
|
9434
|
13 |
|
if ($haystack === '') { |
|
9435
|
3 |
|
if (\PHP_VERSION_ID >= 80000 && $needle === '') { |
|
9436
|
|
|
return ''; |
|
9437
|
|
|
} |
|
9438
|
|
|
|
|
9439
|
3 |
|
return false; |
|
9440
|
|
|
} |
|
9441
|
|
|
|
|
9442
|
11 |
|
if ($clean_utf8) { |
|
9443
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
|
9444
|
|
|
// if invalid characters are found in $haystack before $needle |
|
9445
|
1 |
|
$needle = self::clean($needle); |
|
9446
|
1 |
|
$haystack = self::clean($haystack); |
|
9447
|
|
|
} |
|
9448
|
|
|
|
|
9449
|
11 |
|
if ($needle === '') { |
|
9450
|
2 |
|
if (\PHP_VERSION_ID >= 80000) { |
|
9451
|
|
|
return $haystack; |
|
9452
|
|
|
} |
|
9453
|
|
|
|
|
9454
|
2 |
|
return false; |
|
9455
|
|
|
} |
|
9456
|
|
|
|
|
9457
|
10 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
9458
|
10 |
|
if ($encoding === 'UTF-8') { |
|
9459
|
10 |
|
return \mb_stristr($haystack, $needle, $before_needle); |
|
9460
|
|
|
} |
|
9461
|
|
|
|
|
9462
|
1 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
9463
|
|
|
|
|
9464
|
1 |
|
return \mb_stristr($haystack, $needle, $before_needle, $encoding); |
|
9465
|
|
|
} |
|
9466
|
|
|
|
|
9467
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
9468
|
|
|
|
|
9469
|
|
|
if ( |
|
9470
|
|
|
$encoding !== 'UTF-8' |
|
9471
|
|
|
&& |
|
9472
|
|
|
self::$SUPPORT['mbstring'] === false |
|
9473
|
|
|
) { |
|
9474
|
|
|
/** |
|
9475
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
9476
|
|
|
*/ |
|
9477
|
|
|
\trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
|
9478
|
|
|
} |
|
9479
|
|
|
|
|
9480
|
|
|
if ( |
|
9481
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings |
|
9482
|
|
|
&& |
|
9483
|
|
|
self::$SUPPORT['intl'] === true |
|
9484
|
|
|
) { |
|
9485
|
|
|
$return_tmp = \grapheme_stristr($haystack, $needle, $before_needle); |
|
9486
|
|
|
if ($return_tmp !== false) { |
|
9487
|
|
|
return $return_tmp; |
|
9488
|
|
|
} |
|
9489
|
|
|
} |
|
9490
|
|
|
|
|
9491
|
|
|
if (ASCII::is_ascii($needle . $haystack)) { |
|
9492
|
|
|
return \stristr($haystack, $needle, $before_needle); |
|
9493
|
|
|
} |
|
9494
|
|
|
|
|
9495
|
|
|
\preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match); |
|
9496
|
|
|
|
|
9497
|
|
|
if (!isset($match[1])) { |
|
9498
|
|
|
return false; |
|
9499
|
|
|
} |
|
9500
|
|
|
|
|
9501
|
|
|
if ($before_needle) { |
|
9502
|
|
|
return $match[1]; |
|
9503
|
|
|
} |
|
9504
|
|
|
|
|
9505
|
|
|
return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding); |
|
9506
|
|
|
} |
|
9507
|
|
|
|
|
9508
|
|
|
/** |
|
9509
|
|
|
* Get the string length, not the byte-length! |
|
9510
|
|
|
* |
|
9511
|
|
|
* INFO: use UTF8::strwidth() for the char-length |
|
9512
|
|
|
* |
|
9513
|
|
|
* EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code> |
|
9514
|
|
|
* |
|
9515
|
|
|
* @see http://php.net/manual/en/function.mb-strlen.php |
|
9516
|
|
|
* |
|
9517
|
|
|
* @param string $str <p>The string being checked for length.</p> |
|
9518
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
9519
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
9520
|
|
|
* |
|
9521
|
|
|
* @psalm-pure |
|
9522
|
|
|
* |
|
9523
|
|
|
* @return false|int |
|
9524
|
|
|
* <p> |
|
9525
|
|
|
* The number <strong>(int)</strong> of characters in the string $str having character encoding |
|
9526
|
|
|
* $encoding. |
|
9527
|
|
|
* (One multi-byte character counted as +1). |
|
9528
|
|
|
* <br> |
|
9529
|
|
|
* Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid |
|
9530
|
|
|
* chars. |
|
9531
|
|
|
* </p> |
|
9532
|
|
|
*/ |
|
9533
|
174 |
|
public static function strlen( |
|
9534
|
|
|
string $str, |
|
9535
|
|
|
string $encoding = 'UTF-8', |
|
9536
|
|
|
bool $clean_utf8 = false |
|
9537
|
|
|
) { |
|
9538
|
174 |
|
if ($str === '') { |
|
9539
|
21 |
|
return 0; |
|
9540
|
|
|
} |
|
9541
|
|
|
|
|
9542
|
172 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
9543
|
12 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
9544
|
|
|
} |
|
9545
|
|
|
|
|
9546
|
172 |
|
if ($clean_utf8) { |
|
9547
|
|
|
// "mb_strlen" and "\iconv_strlen" returns wrong length, |
|
9548
|
|
|
// if invalid characters are found in $str |
|
9549
|
5 |
|
$str = self::clean($str); |
|
9550
|
|
|
} |
|
9551
|
|
|
|
|
9552
|
|
|
// |
|
9553
|
|
|
// fallback via mbstring |
|
9554
|
|
|
// |
|
9555
|
|
|
|
|
9556
|
172 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
9557
|
166 |
|
if ($encoding === 'UTF-8') { |
|
9558
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */ |
|
9559
|
166 |
|
return @\mb_strlen($str); |
|
9560
|
|
|
} |
|
9561
|
|
|
|
|
9562
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */ |
|
9563
|
4 |
|
return @\mb_strlen($str, $encoding); |
|
9564
|
|
|
} |
|
9565
|
|
|
|
|
9566
|
|
|
// |
|
9567
|
|
|
// fallback for binary || ascii only |
|
9568
|
|
|
// |
|
9569
|
|
|
|
|
9570
|
|
|
if ( |
|
9571
|
8 |
|
$encoding === 'CP850' |
|
9572
|
|
|
|| |
|
9573
|
8 |
|
$encoding === 'ASCII' |
|
9574
|
|
|
) { |
|
9575
|
|
|
return \strlen($str); |
|
9576
|
|
|
} |
|
9577
|
|
|
|
|
9578
|
|
|
if ( |
|
9579
|
8 |
|
$encoding !== 'UTF-8' |
|
9580
|
|
|
&& |
|
9581
|
8 |
|
self::$SUPPORT['mbstring'] === false |
|
9582
|
|
|
&& |
|
9583
|
8 |
|
self::$SUPPORT['iconv'] === false |
|
9584
|
|
|
) { |
|
9585
|
|
|
/** |
|
9586
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
9587
|
|
|
*/ |
|
9588
|
2 |
|
\trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
|
9589
|
|
|
} |
|
9590
|
|
|
|
|
9591
|
|
|
// |
|
9592
|
|
|
// fallback via iconv |
|
9593
|
|
|
// |
|
9594
|
|
|
|
|
9595
|
8 |
|
if (self::$SUPPORT['iconv'] === true) { |
|
9596
|
|
|
$return_tmp = \iconv_strlen($str, $encoding); |
|
9597
|
|
|
if ($return_tmp !== false) { |
|
9598
|
|
|
return $return_tmp; |
|
9599
|
|
|
} |
|
9600
|
|
|
} |
|
9601
|
|
|
|
|
9602
|
|
|
// |
|
9603
|
|
|
// fallback via intl |
|
9604
|
|
|
// |
|
9605
|
|
|
|
|
9606
|
|
|
if ( |
|
9607
|
8 |
|
$encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings |
|
9608
|
|
|
&& |
|
9609
|
8 |
|
self::$SUPPORT['intl'] === true |
|
9610
|
|
|
) { |
|
9611
|
|
|
$return_tmp = \grapheme_strlen($str); |
|
9612
|
|
|
if ($return_tmp !== null) { |
|
9613
|
|
|
return $return_tmp; |
|
9614
|
|
|
} |
|
9615
|
|
|
} |
|
9616
|
|
|
|
|
9617
|
|
|
// |
|
9618
|
|
|
// fallback for ascii only |
|
9619
|
|
|
// |
|
9620
|
|
|
|
|
9621
|
8 |
|
if (ASCII::is_ascii($str)) { |
|
9622
|
4 |
|
return \strlen($str); |
|
9623
|
|
|
} |
|
9624
|
|
|
|
|
9625
|
|
|
// |
|
9626
|
|
|
// fallback via vanilla php |
|
9627
|
|
|
// |
|
9628
|
|
|
|
|
9629
|
8 |
|
\preg_match_all('/./us', $str, $parts); |
|
9630
|
|
|
|
|
9631
|
8 |
|
$return_tmp = \count($parts[0]); |
|
9632
|
8 |
|
if ($return_tmp === 0) { |
|
9633
|
|
|
return false; |
|
9634
|
|
|
} |
|
9635
|
|
|
|
|
9636
|
8 |
|
return $return_tmp; |
|
9637
|
|
|
} |
|
9638
|
|
|
|
|
9639
|
|
|
/** |
|
9640
|
|
|
* Get string length in byte. |
|
9641
|
|
|
* |
|
9642
|
|
|
* @param string $str |
|
9643
|
|
|
* |
|
9644
|
|
|
* @psalm-pure |
|
9645
|
|
|
* |
|
9646
|
|
|
* @return int |
|
9647
|
|
|
*/ |
|
9648
|
1 |
|
public static function strlen_in_byte(string $str): int |
|
9649
|
|
|
{ |
|
9650
|
1 |
|
if ($str === '') { |
|
9651
|
|
|
return 0; |
|
9652
|
|
|
} |
|
9653
|
|
|
|
|
9654
|
1 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
|
9655
|
|
|
// "mb_" is available if overload is used, so use it ... |
|
9656
|
|
|
return \mb_strlen($str, 'CP850'); // 8-BIT |
|
9657
|
|
|
} |
|
9658
|
|
|
|
|
9659
|
1 |
|
return \strlen($str); |
|
9660
|
|
|
} |
|
9661
|
|
|
|
|
9662
|
|
|
/** |
|
9663
|
|
|
* Case-insensitive string comparisons using a "natural order" algorithm. |
|
9664
|
|
|
* |
|
9665
|
|
|
* INFO: natural order version of UTF8::strcasecmp() |
|
9666
|
|
|
* |
|
9667
|
|
|
* EXAMPLES: <code> |
|
9668
|
|
|
* UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1 |
|
9669
|
|
|
* UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1 |
|
9670
|
|
|
* |
|
9671
|
|
|
* UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1 |
|
9672
|
|
|
* UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1 |
|
9673
|
|
|
* </code> |
|
9674
|
|
|
* |
|
9675
|
|
|
* @param string $str1 <p>The first string.</p> |
|
9676
|
|
|
* @param string $str2 <p>The second string.</p> |
|
9677
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
9678
|
|
|
* |
|
9679
|
|
|
* @psalm-pure |
|
9680
|
|
|
* |
|
9681
|
|
|
* @return int |
|
9682
|
|
|
* <strong>< 0</strong> if str1 is less than str2<br> |
|
9683
|
|
|
* <strong>> 0</strong> if str1 is greater than str2<br> |
|
9684
|
|
|
* <strong>0</strong> if they are equal |
|
9685
|
|
|
*/ |
|
9686
|
2 |
|
public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int |
|
9687
|
|
|
{ |
|
9688
|
2 |
|
return self::strnatcmp( |
|
9689
|
2 |
|
self::strtocasefold($str1, true, false, $encoding, null, false), |
|
9690
|
2 |
|
self::strtocasefold($str2, true, false, $encoding, null, false) |
|
9691
|
|
|
); |
|
9692
|
|
|
} |
|
9693
|
|
|
|
|
9694
|
|
|
/** |
|
9695
|
|
|
* String comparisons using a "natural order" algorithm |
|
9696
|
|
|
* |
|
9697
|
|
|
* INFO: natural order version of UTF8::strcmp() |
|
9698
|
|
|
* |
|
9699
|
|
|
* EXAMPLES: <code> |
|
9700
|
|
|
* UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1 |
|
9701
|
|
|
* UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1 |
|
9702
|
|
|
* |
|
9703
|
|
|
* UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1 |
|
9704
|
|
|
* UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1 |
|
9705
|
|
|
* </code> |
|
9706
|
|
|
* |
|
9707
|
|
|
* @see http://php.net/manual/en/function.strnatcmp.php |
|
9708
|
|
|
* |
|
9709
|
|
|
* @param string $str1 <p>The first string.</p> |
|
9710
|
|
|
* @param string $str2 <p>The second string.</p> |
|
9711
|
|
|
* |
|
9712
|
|
|
* @psalm-pure |
|
9713
|
|
|
* |
|
9714
|
|
|
* @return int |
|
9715
|
|
|
* <strong>< 0</strong> if str1 is less than str2;<br> |
|
9716
|
|
|
* <strong>> 0</strong> if str1 is greater than str2;<br> |
|
9717
|
|
|
* <strong>0</strong> if they are equal |
|
9718
|
|
|
*/ |
|
9719
|
4 |
|
public static function strnatcmp(string $str1, string $str2): int |
|
9720
|
|
|
{ |
|
9721
|
4 |
|
if ($str1 === $str2) { |
|
9722
|
4 |
|
return 0; |
|
9723
|
|
|
} |
|
9724
|
|
|
|
|
9725
|
4 |
|
return \strnatcmp( |
|
9726
|
4 |
|
(string) self::strtonatfold($str1), |
|
9727
|
4 |
|
(string) self::strtonatfold($str2) |
|
9728
|
|
|
); |
|
9729
|
|
|
} |
|
9730
|
|
|
|
|
9731
|
|
|
/** |
|
9732
|
|
|
* Case-insensitive string comparison of the first n characters. |
|
9733
|
|
|
* |
|
9734
|
|
|
* EXAMPLE: <code> |
|
9735
|
|
|
* UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0 |
|
9736
|
|
|
* </code> |
|
9737
|
|
|
* |
|
9738
|
|
|
* @see http://php.net/manual/en/function.strncasecmp.php |
|
9739
|
|
|
* |
|
9740
|
|
|
* @param string $str1 <p>The first string.</p> |
|
9741
|
|
|
* @param string $str2 <p>The second string.</p> |
|
9742
|
|
|
* @param int $len <p>The length of strings to be used in the comparison.</p> |
|
9743
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
9744
|
|
|
* |
|
9745
|
|
|
* @psalm-pure |
|
9746
|
|
|
* |
|
9747
|
|
|
* @return int |
|
9748
|
|
|
* <strong>< 0</strong> if <i>str1</i> is less than <i>str2</i>;<br> |
|
9749
|
|
|
* <strong>> 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br> |
|
9750
|
|
|
* <strong>0</strong> if they are equal |
|
9751
|
|
|
*/ |
|
9752
|
2 |
|
public static function strncasecmp( |
|
9753
|
|
|
string $str1, |
|
9754
|
|
|
string $str2, |
|
9755
|
|
|
int $len, |
|
9756
|
|
|
string $encoding = 'UTF-8' |
|
9757
|
|
|
): int { |
|
9758
|
2 |
|
return self::strncmp( |
|
9759
|
2 |
|
self::strtocasefold($str1, true, false, $encoding, null, false), |
|
9760
|
2 |
|
self::strtocasefold($str2, true, false, $encoding, null, false), |
|
9761
|
2 |
|
$len |
|
9762
|
|
|
); |
|
9763
|
|
|
} |
|
9764
|
|
|
|
|
9765
|
|
|
/** |
|
9766
|
|
|
* String comparison of the first n characters. |
|
9767
|
|
|
* |
|
9768
|
|
|
* EXAMPLE: <code> |
|
9769
|
|
|
* UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0 |
|
9770
|
|
|
* </code> |
|
9771
|
|
|
* |
|
9772
|
|
|
* @see http://php.net/manual/en/function.strncmp.php |
|
9773
|
|
|
* |
|
9774
|
|
|
* @param string $str1 <p>The first string.</p> |
|
9775
|
|
|
* @param string $str2 <p>The second string.</p> |
|
9776
|
|
|
* @param int $len <p>Number of characters to use in the comparison.</p> |
|
9777
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
9778
|
|
|
* |
|
9779
|
|
|
* @psalm-pure |
|
9780
|
|
|
* |
|
9781
|
|
|
* @return int |
|
9782
|
|
|
* <strong>< 0</strong> if <i>str1</i> is less than <i>str2</i>;<br> |
|
9783
|
|
|
* <strong>> 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br> |
|
9784
|
|
|
* <strong>0</strong> if they are equal |
|
9785
|
|
|
*/ |
|
9786
|
4 |
|
public static function strncmp( |
|
9787
|
|
|
string $str1, |
|
9788
|
|
|
string $str2, |
|
9789
|
|
|
int $len, |
|
9790
|
|
|
string $encoding = 'UTF-8' |
|
9791
|
|
|
): int { |
|
9792
|
4 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
9793
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
9794
|
|
|
} |
|
9795
|
|
|
|
|
9796
|
4 |
|
if ($encoding === 'UTF-8') { |
|
9797
|
4 |
|
$str1 = (string) \mb_substr($str1, 0, $len); |
|
9798
|
4 |
|
$str2 = (string) \mb_substr($str2, 0, $len); |
|
9799
|
|
|
} else { |
|
9800
|
|
|
$str1 = (string) self::substr($str1, 0, $len, $encoding); |
|
9801
|
|
|
$str2 = (string) self::substr($str2, 0, $len, $encoding); |
|
9802
|
|
|
} |
|
9803
|
|
|
|
|
9804
|
4 |
|
return self::strcmp($str1, $str2); |
|
9805
|
|
|
} |
|
9806
|
|
|
|
|
9807
|
|
|
/** |
|
9808
|
|
|
* Search a string for any of a set of characters. |
|
9809
|
|
|
* |
|
9810
|
|
|
* EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code> |
|
9811
|
|
|
* |
|
9812
|
|
|
* @see http://php.net/manual/en/function.strpbrk.php |
|
9813
|
|
|
* |
|
9814
|
|
|
* @param string $haystack <p>The string where char_list is looked for.</p> |
|
9815
|
|
|
* @param string $char_list <p>This parameter is case-sensitive.</p> |
|
9816
|
|
|
* |
|
9817
|
|
|
* @psalm-pure |
|
9818
|
|
|
* |
|
9819
|
|
|
* @return false|string |
|
9820
|
|
|
* <p>The string starting from the character found, or false if it is not found.</p> |
|
9821
|
|
|
*/ |
|
9822
|
2 |
|
public static function strpbrk(string $haystack, string $char_list) |
|
9823
|
|
|
{ |
|
9824
|
2 |
|
if ($haystack === '' || $char_list === '') { |
|
9825
|
2 |
|
return false; |
|
9826
|
|
|
} |
|
9827
|
|
|
|
|
9828
|
2 |
|
if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) { |
|
9829
|
2 |
|
return \substr($haystack, (int) \strpos($haystack, $m[0])); |
|
9830
|
|
|
} |
|
9831
|
|
|
|
|
9832
|
2 |
|
return false; |
|
9833
|
|
|
} |
|
9834
|
|
|
|
|
9835
|
|
|
/** |
|
9836
|
|
|
* Find the position of the first occurrence of a substring in a string. |
|
9837
|
|
|
* |
|
9838
|
|
|
* INFO: use UTF8::strpos_in_byte() for the byte-length |
|
9839
|
|
|
* |
|
9840
|
|
|
* EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code> |
|
9841
|
|
|
* |
|
9842
|
|
|
* @see http://php.net/manual/en/function.mb-strpos.php |
|
9843
|
|
|
* |
|
9844
|
|
|
* @param string $haystack <p>The string from which to get the position of the first occurrence of needle.</p> |
|
9845
|
|
|
* @param int|string $needle <p>The string to find in haystack.<br>Or a code point as int.</p> |
|
9846
|
|
|
* @param int $offset [optional] <p>The search offset. If it is not specified, 0 is used.</p> |
|
9847
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
9848
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
9849
|
|
|
* |
|
9850
|
|
|
* @psalm-pure |
|
9851
|
|
|
* |
|
9852
|
|
|
* @return false|int |
|
9853
|
|
|
* The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack |
|
9854
|
|
|
* string.<br> If needle is not found it returns false. |
|
9855
|
|
|
*/ |
|
9856
|
52 |
|
public static function strpos( |
|
9857
|
|
|
string $haystack, |
|
9858
|
|
|
$needle, |
|
9859
|
|
|
int $offset = 0, |
|
9860
|
|
|
string $encoding = 'UTF-8', |
|
9861
|
|
|
bool $clean_utf8 = false |
|
9862
|
|
|
) { |
|
9863
|
52 |
|
if ($haystack === '') { |
|
9864
|
4 |
|
if (\PHP_VERSION_ID >= 80000) { |
|
9865
|
|
|
if ($needle === '') { |
|
9866
|
|
|
return 0; |
|
9867
|
|
|
} |
|
9868
|
|
|
} else { |
|
9869
|
4 |
|
return false; |
|
9870
|
|
|
} |
|
9871
|
|
|
} |
|
9872
|
|
|
|
|
9873
|
|
|
// iconv and mbstring do not support integer $needle |
|
9874
|
51 |
|
if ((int) $needle === $needle) { |
|
9875
|
|
|
$needle = (string) self::chr($needle); |
|
9876
|
|
|
} |
|
9877
|
51 |
|
$needle = (string) $needle; |
|
9878
|
|
|
|
|
9879
|
51 |
|
if ($haystack === '') { |
|
9880
|
|
|
if (\PHP_VERSION_ID >= 80000 && $needle === '') { |
|
9881
|
|
|
return 0; |
|
9882
|
|
|
} |
|
9883
|
|
|
|
|
9884
|
|
|
return false; |
|
9885
|
|
|
} |
|
9886
|
|
|
|
|
9887
|
51 |
|
if ($needle === '' && \PHP_VERSION_ID < 80000) { |
|
9888
|
2 |
|
return false; |
|
9889
|
|
|
} |
|
9890
|
|
|
|
|
9891
|
51 |
|
if ($clean_utf8) { |
|
9892
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
|
9893
|
|
|
// if invalid characters are found in $haystack before $needle |
|
9894
|
3 |
|
$needle = self::clean($needle); |
|
9895
|
3 |
|
$haystack = self::clean($haystack); |
|
9896
|
|
|
} |
|
9897
|
|
|
|
|
9898
|
51 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
9899
|
10 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
9900
|
|
|
} |
|
9901
|
|
|
|
|
9902
|
|
|
// |
|
9903
|
|
|
// fallback via mbstring |
|
9904
|
|
|
// |
|
9905
|
|
|
|
|
9906
|
51 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
9907
|
49 |
|
if ($encoding === 'UTF-8') { |
|
9908
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */ |
|
9909
|
49 |
|
return @\mb_strpos($haystack, $needle, $offset); |
|
9910
|
|
|
} |
|
9911
|
|
|
|
|
9912
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */ |
|
9913
|
2 |
|
return @\mb_strpos($haystack, $needle, $offset, $encoding); |
|
9914
|
|
|
} |
|
9915
|
|
|
|
|
9916
|
|
|
// |
|
9917
|
|
|
// fallback for binary || ascii only |
|
9918
|
|
|
// |
|
9919
|
|
|
if ( |
|
9920
|
4 |
|
$encoding === 'CP850' |
|
9921
|
|
|
|| |
|
9922
|
4 |
|
$encoding === 'ASCII' |
|
9923
|
|
|
) { |
|
9924
|
2 |
|
return \strpos($haystack, $needle, $offset); |
|
9925
|
|
|
} |
|
9926
|
|
|
|
|
9927
|
|
|
if ( |
|
9928
|
4 |
|
$encoding !== 'UTF-8' |
|
9929
|
|
|
&& |
|
9930
|
4 |
|
self::$SUPPORT['iconv'] === false |
|
9931
|
|
|
&& |
|
9932
|
4 |
|
self::$SUPPORT['mbstring'] === false |
|
9933
|
|
|
) { |
|
9934
|
|
|
/** |
|
9935
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
9936
|
|
|
*/ |
|
9937
|
2 |
|
\trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
|
9938
|
|
|
} |
|
9939
|
|
|
|
|
9940
|
|
|
// |
|
9941
|
|
|
// fallback via intl |
|
9942
|
|
|
// |
|
9943
|
|
|
|
|
9944
|
|
|
if ( |
|
9945
|
4 |
|
$encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings |
|
9946
|
|
|
&& |
|
9947
|
4 |
|
$offset >= 0 // grapheme_strpos() can't handle negative offset |
|
9948
|
|
|
&& |
|
9949
|
4 |
|
self::$SUPPORT['intl'] === true |
|
9950
|
|
|
) { |
|
9951
|
|
|
$return_tmp = \grapheme_strpos($haystack, $needle, $offset); |
|
9952
|
|
|
if ($return_tmp !== false) { |
|
9953
|
|
|
return $return_tmp; |
|
9954
|
|
|
} |
|
9955
|
|
|
} |
|
9956
|
|
|
|
|
9957
|
|
|
// |
|
9958
|
|
|
// fallback via iconv |
|
9959
|
|
|
// |
|
9960
|
|
|
|
|
9961
|
|
|
if ( |
|
9962
|
4 |
|
$offset >= 0 // iconv_strpos() can't handle negative offset |
|
9963
|
|
|
&& |
|
9964
|
4 |
|
self::$SUPPORT['iconv'] === true |
|
9965
|
|
|
) { |
|
9966
|
|
|
// ignore invalid negative offset to keep compatibility |
|
9967
|
|
|
// with php < 5.5.35, < 5.6.21, < 7.0.6 |
|
9968
|
|
|
$return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding); |
|
9969
|
|
|
if ($return_tmp !== false) { |
|
9970
|
|
|
return $return_tmp; |
|
9971
|
|
|
} |
|
9972
|
|
|
} |
|
9973
|
|
|
|
|
9974
|
|
|
// |
|
9975
|
|
|
// fallback for ascii only |
|
9976
|
|
|
// |
|
9977
|
|
|
|
|
9978
|
4 |
|
if (ASCII::is_ascii($haystack . $needle)) { |
|
9979
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */ |
|
9980
|
2 |
|
return @\strpos($haystack, $needle, $offset); |
|
9981
|
|
|
} |
|
9982
|
|
|
|
|
9983
|
|
|
// |
|
9984
|
|
|
// fallback via vanilla php |
|
9985
|
|
|
// |
|
9986
|
|
|
|
|
9987
|
4 |
|
$haystack_tmp = self::substr($haystack, $offset, null, $encoding); |
|
9988
|
4 |
|
if ($haystack_tmp === false) { |
|
9989
|
|
|
$haystack_tmp = ''; |
|
9990
|
|
|
} |
|
9991
|
4 |
|
$haystack = (string) $haystack_tmp; |
|
9992
|
|
|
|
|
9993
|
4 |
|
if ($offset < 0) { |
|
9994
|
|
|
$offset = 0; |
|
9995
|
|
|
} |
|
9996
|
|
|
|
|
9997
|
4 |
|
$pos = \strpos($haystack, $needle); |
|
9998
|
4 |
|
if ($pos === false) { |
|
9999
|
3 |
|
return false; |
|
10000
|
|
|
} |
|
10001
|
|
|
|
|
10002
|
4 |
|
if ($pos) { |
|
10003
|
4 |
|
return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding); |
|
10004
|
|
|
} |
|
10005
|
|
|
|
|
10006
|
2 |
|
return $offset + 0; |
|
10007
|
|
|
} |
|
10008
|
|
|
|
|
10009
|
|
|
/** |
|
10010
|
|
|
* Find the position of the first occurrence of a substring in a string. |
|
10011
|
|
|
* |
|
10012
|
|
|
* @param string $haystack <p> |
|
10013
|
|
|
* The string being checked. |
|
10014
|
|
|
* </p> |
|
10015
|
|
|
* @param string $needle <p> |
|
10016
|
|
|
* The position counted from the beginning of haystack. |
|
10017
|
|
|
* </p> |
|
10018
|
|
|
* @param int $offset [optional] <p> |
|
10019
|
|
|
* The search offset. If it is not specified, 0 is used. |
|
10020
|
|
|
* </p> |
|
10021
|
|
|
* |
|
10022
|
|
|
* @psalm-pure |
|
10023
|
|
|
* |
|
10024
|
|
|
* @return false|int |
|
10025
|
|
|
* <p>The numeric position of the first occurrence of needle in the |
|
10026
|
|
|
* haystack string. If needle is not found, it returns false.</p> |
|
10027
|
|
|
*/ |
|
10028
|
2 |
|
public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0) |
|
10029
|
|
|
{ |
|
10030
|
2 |
|
if ($haystack === '' || $needle === '') { |
|
10031
|
|
|
return false; |
|
10032
|
|
|
} |
|
10033
|
|
|
|
|
10034
|
2 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
|
10035
|
|
|
// "mb_" is available if overload is used, so use it ... |
|
10036
|
|
|
return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT |
|
10037
|
|
|
} |
|
10038
|
|
|
|
|
10039
|
2 |
|
return \strpos($haystack, $needle, $offset); |
|
10040
|
|
|
} |
|
10041
|
|
|
|
|
10042
|
|
|
/** |
|
10043
|
|
|
* Find the position of the first occurrence of a substring in a string, case-insensitive. |
|
10044
|
|
|
* |
|
10045
|
|
|
* @param string $haystack <p> |
|
10046
|
|
|
* The string being checked. |
|
10047
|
|
|
* </p> |
|
10048
|
|
|
* @param string $needle <p> |
|
10049
|
|
|
* The position counted from the beginning of haystack. |
|
10050
|
|
|
* </p> |
|
10051
|
|
|
* @param int $offset [optional] <p> |
|
10052
|
|
|
* The search offset. If it is not specified, 0 is used. |
|
10053
|
|
|
* </p> |
|
10054
|
|
|
* |
|
10055
|
|
|
* @psalm-pure |
|
10056
|
|
|
* |
|
10057
|
|
|
* @return false|int |
|
10058
|
|
|
* <p>The numeric position of the first occurrence of needle in the |
|
10059
|
|
|
* haystack string. If needle is not found, it returns false.</p> |
|
10060
|
|
|
*/ |
|
10061
|
2 |
|
public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0) |
|
10062
|
|
|
{ |
|
10063
|
2 |
|
if ($haystack === '' || $needle === '') { |
|
10064
|
|
|
return false; |
|
10065
|
|
|
} |
|
10066
|
|
|
|
|
10067
|
2 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
|
10068
|
|
|
// "mb_" is available if overload is used, so use it ... |
|
10069
|
|
|
return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT |
|
10070
|
|
|
} |
|
10071
|
|
|
|
|
10072
|
2 |
|
return \stripos($haystack, $needle, $offset); |
|
10073
|
|
|
} |
|
10074
|
|
|
|
|
10075
|
|
|
/** |
|
10076
|
|
|
* Find the last occurrence of a character in a string within another. |
|
10077
|
|
|
* |
|
10078
|
|
|
* EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code> |
|
10079
|
|
|
* |
|
10080
|
|
|
* @see http://php.net/manual/en/function.mb-strrchr.php |
|
10081
|
|
|
* |
|
10082
|
|
|
* @param string $haystack <p>The string from which to get the last occurrence of needle.</p> |
|
10083
|
|
|
* @param string $needle <p>The string to find in haystack</p> |
|
10084
|
|
|
* @param bool $before_needle [optional] <p> |
|
10085
|
|
|
* Determines which portion of haystack |
|
10086
|
|
|
* this function returns. |
|
10087
|
|
|
* If set to true, it returns all of haystack |
|
10088
|
|
|
* from the beginning to the last occurrence of needle. |
|
10089
|
|
|
* If set to false, it returns all of haystack |
|
10090
|
|
|
* from the last occurrence of needle to the end, |
|
10091
|
|
|
* </p> |
|
10092
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
10093
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
10094
|
|
|
* |
|
10095
|
|
|
* @psalm-pure |
|
10096
|
|
|
* |
|
10097
|
|
|
* @return false|string |
|
10098
|
|
|
* <p>The portion of haystack or false if needle is not found.</p> |
|
10099
|
|
|
*/ |
|
10100
|
2 |
|
public static function strrchr( |
|
10101
|
|
|
string $haystack, |
|
10102
|
|
|
string $needle, |
|
10103
|
|
|
bool $before_needle = false, |
|
10104
|
|
|
string $encoding = 'UTF-8', |
|
10105
|
|
|
bool $clean_utf8 = false |
|
10106
|
|
|
) { |
|
10107
|
2 |
|
if ($haystack === '' || $needle === '') { |
|
10108
|
2 |
|
return false; |
|
10109
|
|
|
} |
|
10110
|
|
|
|
|
10111
|
2 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
10112
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
10113
|
|
|
} |
|
10114
|
|
|
|
|
10115
|
2 |
|
if ($clean_utf8) { |
|
10116
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
|
10117
|
|
|
// if invalid characters are found in $haystack before $needle |
|
10118
|
2 |
|
$needle = self::clean($needle); |
|
10119
|
2 |
|
$haystack = self::clean($haystack); |
|
10120
|
|
|
} |
|
10121
|
|
|
|
|
10122
|
|
|
// |
|
10123
|
|
|
// fallback via mbstring |
|
10124
|
|
|
// |
|
10125
|
|
|
|
|
10126
|
2 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
10127
|
2 |
|
if ($encoding === 'UTF-8') { |
|
10128
|
2 |
|
return \mb_strrchr($haystack, $needle, $before_needle); |
|
10129
|
|
|
} |
|
10130
|
|
|
|
|
10131
|
2 |
|
return \mb_strrchr($haystack, $needle, $before_needle, $encoding); |
|
10132
|
|
|
} |
|
10133
|
|
|
|
|
10134
|
|
|
// |
|
10135
|
|
|
// fallback for binary || ascii only |
|
10136
|
|
|
// |
|
10137
|
|
|
|
|
10138
|
|
|
if ( |
|
10139
|
|
|
!$before_needle |
|
10140
|
|
|
&& |
|
10141
|
|
|
( |
|
10142
|
|
|
$encoding === 'CP850' |
|
10143
|
|
|
|| |
|
10144
|
|
|
$encoding === 'ASCII' |
|
10145
|
|
|
) |
|
10146
|
|
|
) { |
|
10147
|
|
|
return \strrchr($haystack, $needle); |
|
10148
|
|
|
} |
|
10149
|
|
|
|
|
10150
|
|
|
if ( |
|
10151
|
|
|
$encoding !== 'UTF-8' |
|
10152
|
|
|
&& |
|
10153
|
|
|
self::$SUPPORT['mbstring'] === false |
|
10154
|
|
|
) { |
|
10155
|
|
|
/** |
|
10156
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
10157
|
|
|
*/ |
|
10158
|
|
|
\trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
|
10159
|
|
|
} |
|
10160
|
|
|
|
|
10161
|
|
|
// |
|
10162
|
|
|
// fallback via iconv |
|
10163
|
|
|
// |
|
10164
|
|
|
|
|
10165
|
|
|
if (self::$SUPPORT['iconv'] === true) { |
|
10166
|
|
|
$needle_tmp = self::substr($needle, 0, 1, $encoding); |
|
10167
|
|
|
if ($needle_tmp === false) { |
|
10168
|
|
|
return false; |
|
10169
|
|
|
} |
|
10170
|
|
|
$needle = $needle_tmp; |
|
10171
|
|
|
|
|
10172
|
|
|
$pos = \iconv_strrpos($haystack, $needle, $encoding); |
|
10173
|
|
|
if ($pos === false) { |
|
10174
|
|
|
return false; |
|
10175
|
|
|
} |
|
10176
|
|
|
|
|
10177
|
|
|
if ($before_needle) { |
|
10178
|
|
|
return self::substr($haystack, 0, $pos, $encoding); |
|
10179
|
|
|
} |
|
10180
|
|
|
|
|
10181
|
|
|
return self::substr($haystack, $pos, null, $encoding); |
|
10182
|
|
|
} |
|
10183
|
|
|
|
|
10184
|
|
|
// |
|
10185
|
|
|
// fallback via vanilla php |
|
10186
|
|
|
// |
|
10187
|
|
|
|
|
10188
|
|
|
$needle_tmp = self::substr($needle, 0, 1, $encoding); |
|
10189
|
|
|
if ($needle_tmp === false) { |
|
10190
|
|
|
return false; |
|
10191
|
|
|
} |
|
10192
|
|
|
$needle = $needle_tmp; |
|
10193
|
|
|
|
|
10194
|
|
|
$pos = self::strrpos($haystack, $needle, 0, $encoding); |
|
10195
|
|
|
if ($pos === false) { |
|
10196
|
|
|
return false; |
|
10197
|
|
|
} |
|
10198
|
|
|
|
|
10199
|
|
|
if ($before_needle) { |
|
10200
|
|
|
return self::substr($haystack, 0, $pos, $encoding); |
|
10201
|
|
|
} |
|
10202
|
|
|
|
|
10203
|
|
|
return self::substr($haystack, $pos, null, $encoding); |
|
10204
|
|
|
} |
|
10205
|
|
|
|
|
10206
|
|
|
/** |
|
10207
|
|
|
* Reverses characters order in the string. |
|
10208
|
|
|
* |
|
10209
|
|
|
* EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code> |
|
10210
|
|
|
* |
|
10211
|
|
|
* @param string $str <p>The input string.</p> |
|
10212
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
10213
|
|
|
* |
|
10214
|
|
|
* @psalm-pure |
|
10215
|
|
|
* |
|
10216
|
|
|
* @return string |
|
10217
|
|
|
* <p>The string with characters in the reverse sequence.</p> |
|
10218
|
|
|
*/ |
|
10219
|
10 |
|
public static function strrev(string $str, string $encoding = 'UTF-8'): string |
|
10220
|
|
|
{ |
|
10221
|
10 |
|
if ($str === '') { |
|
10222
|
4 |
|
return ''; |
|
10223
|
|
|
} |
|
10224
|
|
|
|
|
10225
|
|
|
// init |
|
10226
|
8 |
|
$reversed = ''; |
|
10227
|
|
|
|
|
10228
|
8 |
|
$str = self::emoji_encode($str, true); |
|
10229
|
|
|
|
|
10230
|
8 |
|
if ($encoding === 'UTF-8') { |
|
10231
|
8 |
|
if (self::$SUPPORT['intl'] === true) { |
|
10232
|
|
|
// try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8 |
|
10233
|
8 |
|
$i = (int) \grapheme_strlen($str); |
|
10234
|
8 |
|
while ($i--) { |
|
10235
|
8 |
|
$reversed_tmp = \grapheme_substr($str, $i, 1); |
|
10236
|
8 |
|
if ($reversed_tmp !== false) { |
|
10237
|
8 |
|
$reversed .= $reversed_tmp; |
|
10238
|
|
|
} |
|
10239
|
|
|
} |
|
10240
|
|
|
} else { |
|
10241
|
|
|
$i = (int) \mb_strlen($str); |
|
10242
|
8 |
|
while ($i--) { |
|
10243
|
|
|
$reversed_tmp = \mb_substr($str, $i, 1); |
|
10244
|
|
|
if ($reversed_tmp !== false) { |
|
10245
|
|
|
$reversed .= $reversed_tmp; |
|
10246
|
|
|
} |
|
10247
|
|
|
} |
|
10248
|
|
|
} |
|
10249
|
|
|
} else { |
|
10250
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
10251
|
|
|
|
|
10252
|
|
|
$i = (int) self::strlen($str, $encoding); |
|
10253
|
|
|
while ($i--) { |
|
10254
|
|
|
$reversed_tmp = self::substr($str, $i, 1, $encoding); |
|
10255
|
|
|
if ($reversed_tmp !== false) { |
|
10256
|
|
|
$reversed .= $reversed_tmp; |
|
10257
|
|
|
} |
|
10258
|
|
|
} |
|
10259
|
|
|
} |
|
10260
|
|
|
|
|
10261
|
8 |
|
return self::emoji_decode($reversed, true); |
|
10262
|
|
|
} |
|
10263
|
|
|
|
|
10264
|
|
|
/** |
|
10265
|
|
|
* Find the last occurrence of a character in a string within another, case-insensitive. |
|
10266
|
|
|
* |
|
10267
|
|
|
* EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code> |
|
10268
|
|
|
* |
|
10269
|
|
|
* @see http://php.net/manual/en/function.mb-strrichr.php |
|
10270
|
|
|
* |
|
10271
|
|
|
* @param string $haystack <p>The string from which to get the last occurrence of needle.</p> |
|
10272
|
|
|
* @param string $needle <p>The string to find in haystack.</p> |
|
10273
|
|
|
* @param bool $before_needle [optional] <p> |
|
10274
|
|
|
* Determines which portion of haystack |
|
10275
|
|
|
* this function returns. |
|
10276
|
|
|
* If set to true, it returns all of haystack |
|
10277
|
|
|
* from the beginning to the last occurrence of needle. |
|
10278
|
|
|
* If set to false, it returns all of haystack |
|
10279
|
|
|
* from the last occurrence of needle to the end, |
|
10280
|
|
|
* </p> |
|
10281
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
10282
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
10283
|
|
|
* |
|
10284
|
|
|
* @psalm-pure |
|
10285
|
|
|
* |
|
10286
|
|
|
* @return false|string |
|
10287
|
|
|
* <p>The portion of haystack or<br>false if needle is not found.</p> |
|
10288
|
|
|
*/ |
|
10289
|
3 |
|
public static function strrichr( |
|
10290
|
|
|
string $haystack, |
|
10291
|
|
|
string $needle, |
|
10292
|
|
|
bool $before_needle = false, |
|
10293
|
|
|
string $encoding = 'UTF-8', |
|
10294
|
|
|
bool $clean_utf8 = false |
|
10295
|
|
|
) { |
|
10296
|
3 |
|
if ($haystack === '' || $needle === '') { |
|
10297
|
2 |
|
return false; |
|
10298
|
|
|
} |
|
10299
|
|
|
|
|
10300
|
3 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
10301
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
10302
|
|
|
} |
|
10303
|
|
|
|
|
10304
|
3 |
|
if ($clean_utf8) { |
|
10305
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
|
10306
|
|
|
// if invalid characters are found in $haystack before $needle |
|
10307
|
2 |
|
$needle = self::clean($needle); |
|
10308
|
2 |
|
$haystack = self::clean($haystack); |
|
10309
|
|
|
} |
|
10310
|
|
|
|
|
10311
|
|
|
// |
|
10312
|
|
|
// fallback via mbstring |
|
10313
|
|
|
// |
|
10314
|
|
|
|
|
10315
|
3 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
10316
|
3 |
|
if ($encoding === 'UTF-8') { |
|
10317
|
3 |
|
return \mb_strrichr($haystack, $needle, $before_needle); |
|
10318
|
|
|
} |
|
10319
|
|
|
|
|
10320
|
2 |
|
return \mb_strrichr($haystack, $needle, $before_needle, $encoding); |
|
10321
|
|
|
} |
|
10322
|
|
|
|
|
10323
|
|
|
// |
|
10324
|
|
|
// fallback via vanilla php |
|
10325
|
|
|
// |
|
10326
|
|
|
|
|
10327
|
|
|
$needle_tmp = self::substr($needle, 0, 1, $encoding); |
|
10328
|
|
|
if ($needle_tmp === false) { |
|
10329
|
|
|
return false; |
|
10330
|
|
|
} |
|
10331
|
|
|
$needle = $needle_tmp; |
|
10332
|
|
|
|
|
10333
|
|
|
$pos = self::strripos($haystack, $needle, 0, $encoding); |
|
10334
|
|
|
if ($pos === false) { |
|
10335
|
|
|
return false; |
|
10336
|
|
|
} |
|
10337
|
|
|
|
|
10338
|
|
|
if ($before_needle) { |
|
10339
|
|
|
return self::substr($haystack, 0, $pos, $encoding); |
|
10340
|
|
|
} |
|
10341
|
|
|
|
|
10342
|
|
|
return self::substr($haystack, $pos, null, $encoding); |
|
10343
|
|
|
} |
|
10344
|
|
|
|
|
10345
|
|
|
/** |
|
10346
|
|
|
* Find the position of the last occurrence of a substring in a string, case-insensitive. |
|
10347
|
|
|
* |
|
10348
|
|
|
* EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code> |
|
10349
|
|
|
* |
|
10350
|
|
|
* @param string $haystack <p>The string to look in.</p> |
|
10351
|
|
|
* @param int|string $needle <p>The string to look for.</p> |
|
10352
|
|
|
* @param int $offset [optional] <p>Number of characters to ignore in the beginning or end.</p> |
|
10353
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
10354
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
10355
|
|
|
* |
|
10356
|
|
|
* @psalm-pure |
|
10357
|
|
|
* |
|
10358
|
|
|
* @return false|int |
|
10359
|
|
|
* <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack |
|
10360
|
|
|
* string.<br>If needle is not found, it returns false.</p> |
|
10361
|
|
|
*/ |
|
10362
|
14 |
|
public static function strripos( |
|
10363
|
|
|
string $haystack, |
|
10364
|
|
|
$needle, |
|
10365
|
|
|
int $offset = 0, |
|
10366
|
|
|
string $encoding = 'UTF-8', |
|
10367
|
|
|
bool $clean_utf8 = false |
|
10368
|
|
|
) { |
|
10369
|
14 |
|
if ($haystack === '') { |
|
10370
|
3 |
|
if (\PHP_VERSION_ID >= 80000) { |
|
10371
|
|
|
if ($needle === '') { |
|
10372
|
|
|
return 0; |
|
10373
|
|
|
} |
|
10374
|
|
|
} else { |
|
10375
|
3 |
|
return false; |
|
10376
|
|
|
} |
|
10377
|
|
|
} |
|
10378
|
|
|
|
|
10379
|
|
|
// iconv and mbstring do not support integer $needle |
|
10380
|
14 |
|
if ((int) $needle === $needle && $needle >= 0) { |
|
10381
|
|
|
$needle = (string) self::chr($needle); |
|
10382
|
|
|
} |
|
10383
|
14 |
|
$needle = (string) $needle; |
|
10384
|
|
|
|
|
10385
|
14 |
|
if ($haystack === '') { |
|
10386
|
|
|
if (\PHP_VERSION_ID >= 80000 && $needle === '') { |
|
10387
|
|
|
return 0; |
|
10388
|
|
|
} |
|
10389
|
|
|
|
|
10390
|
|
|
return false; |
|
10391
|
|
|
} |
|
10392
|
|
|
|
|
10393
|
14 |
|
if ($needle === '' && \PHP_VERSION_ID < 80000) { |
|
10394
|
3 |
|
return false; |
|
10395
|
|
|
} |
|
10396
|
|
|
|
|
10397
|
14 |
|
if ($clean_utf8) { |
|
10398
|
|
|
// mb_strripos() && iconv_strripos() is not tolerant to invalid characters |
|
10399
|
3 |
|
$needle = self::clean($needle); |
|
10400
|
3 |
|
$haystack = self::clean($haystack); |
|
10401
|
|
|
} |
|
10402
|
|
|
|
|
10403
|
14 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
10404
|
9 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
10405
|
|
|
} |
|
10406
|
|
|
|
|
10407
|
|
|
// |
|
10408
|
|
|
// fallback via mbstrig |
|
10409
|
|
|
// |
|
10410
|
|
|
|
|
10411
|
14 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
10412
|
14 |
|
if ($encoding === 'UTF-8') { |
|
10413
|
14 |
|
return \mb_strripos($haystack, $needle, $offset); |
|
10414
|
|
|
} |
|
10415
|
|
|
|
|
10416
|
|
|
return \mb_strripos($haystack, $needle, $offset, $encoding); |
|
10417
|
|
|
} |
|
10418
|
|
|
|
|
10419
|
|
|
// |
|
10420
|
|
|
// fallback for binary || ascii only |
|
10421
|
|
|
// |
|
10422
|
|
|
|
|
10423
|
|
|
if ( |
|
10424
|
|
|
$encoding === 'CP850' |
|
10425
|
|
|
|| |
|
10426
|
|
|
$encoding === 'ASCII' |
|
10427
|
|
|
) { |
|
10428
|
|
|
return \strripos($haystack, $needle, $offset); |
|
10429
|
|
|
} |
|
10430
|
|
|
|
|
10431
|
|
|
if ( |
|
10432
|
|
|
$encoding !== 'UTF-8' |
|
10433
|
|
|
&& |
|
10434
|
|
|
self::$SUPPORT['mbstring'] === false |
|
10435
|
|
|
) { |
|
10436
|
|
|
/** |
|
10437
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
10438
|
|
|
*/ |
|
10439
|
|
|
\trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
|
10440
|
|
|
} |
|
10441
|
|
|
|
|
10442
|
|
|
// |
|
10443
|
|
|
// fallback via intl |
|
10444
|
|
|
// |
|
10445
|
|
|
|
|
10446
|
|
|
if ( |
|
10447
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings |
|
10448
|
|
|
&& |
|
10449
|
|
|
$offset >= 0 // grapheme_strripos() can't handle negative offset |
|
10450
|
|
|
&& |
|
10451
|
|
|
self::$SUPPORT['intl'] === true |
|
10452
|
|
|
) { |
|
10453
|
|
|
$return_tmp = \grapheme_strripos($haystack, $needle, $offset); |
|
10454
|
|
|
if ($return_tmp !== false) { |
|
10455
|
|
|
return $return_tmp; |
|
10456
|
|
|
} |
|
10457
|
|
|
} |
|
10458
|
|
|
|
|
10459
|
|
|
// |
|
10460
|
|
|
// fallback for ascii only |
|
10461
|
|
|
// |
|
10462
|
|
|
|
|
10463
|
|
|
if (ASCII::is_ascii($haystack . $needle)) { |
|
10464
|
|
|
return \strripos($haystack, $needle, $offset); |
|
10465
|
|
|
} |
|
10466
|
|
|
|
|
10467
|
|
|
// |
|
10468
|
|
|
// fallback via vanilla php |
|
10469
|
|
|
// |
|
10470
|
|
|
|
|
10471
|
|
|
$haystack = self::strtocasefold($haystack, true, false, $encoding); |
|
10472
|
|
|
$needle = self::strtocasefold($needle, true, false, $encoding); |
|
10473
|
|
|
|
|
10474
|
|
|
return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8); |
|
10475
|
|
|
} |
|
10476
|
|
|
|
|
10477
|
|
|
/** |
|
10478
|
|
|
* Finds position of last occurrence of a string within another, case-insensitive. |
|
10479
|
|
|
* |
|
10480
|
|
|
* @param string $haystack <p> |
|
10481
|
|
|
* The string from which to get the position of the last occurrence |
|
10482
|
|
|
* of needle. |
|
10483
|
|
|
* </p> |
|
10484
|
|
|
* @param string $needle <p> |
|
10485
|
|
|
* The string to find in haystack. |
|
10486
|
|
|
* </p> |
|
10487
|
|
|
* @param int $offset [optional] <p> |
|
10488
|
|
|
* The position in haystack |
|
10489
|
|
|
* to start searching. |
|
10490
|
|
|
* </p> |
|
10491
|
|
|
* |
|
10492
|
|
|
* @psalm-pure |
|
10493
|
|
|
* |
|
10494
|
|
|
* @return false|int |
|
10495
|
|
|
* <p>eturn the numeric position of the last occurrence of needle in the |
|
10496
|
|
|
* haystack string, or false if needle is not found.</p> |
|
10497
|
|
|
*/ |
|
10498
|
2 |
|
public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0) |
|
10499
|
|
|
{ |
|
10500
|
2 |
|
if ($haystack === '' || $needle === '') { |
|
10501
|
|
|
return false; |
|
10502
|
|
|
} |
|
10503
|
|
|
|
|
10504
|
2 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
|
10505
|
|
|
// "mb_" is available if overload is used, so use it ... |
|
10506
|
|
|
return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT |
|
10507
|
|
|
} |
|
10508
|
|
|
|
|
10509
|
2 |
|
return \strripos($haystack, $needle, $offset); |
|
10510
|
|
|
} |
|
10511
|
|
|
|
|
10512
|
|
|
/** |
|
10513
|
|
|
* Find the position of the last occurrence of a substring in a string. |
|
10514
|
|
|
* |
|
10515
|
|
|
* EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code> |
|
10516
|
|
|
* |
|
10517
|
|
|
* @see http://php.net/manual/en/function.mb-strrpos.php |
|
10518
|
|
|
* |
|
10519
|
|
|
* @param string $haystack <p>The string being checked, for the last occurrence of needle</p> |
|
10520
|
|
|
* @param int|string $needle <p>The string to find in haystack.<br>Or a code point as int.</p> |
|
10521
|
|
|
* @param int $offset [optional] <p>May be specified to begin searching an arbitrary number of characters |
|
10522
|
|
|
* into the string. Negative values will stop searching at an arbitrary point prior to |
|
10523
|
|
|
* the end of the string. |
|
10524
|
|
|
* </p> |
|
10525
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
|
10526
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
10527
|
|
|
* |
|
10528
|
|
|
* @psalm-pure |
|
10529
|
|
|
* |
|
10530
|
|
|
* @return false|int |
|
10531
|
|
|
* <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack |
|
10532
|
|
|
* string.<br>If needle is not found, it returns false.</p> |
|
10533
|
|
|
*/ |
|
10534
|
35 |
|
public static function strrpos( |
|
10535
|
|
|
string $haystack, |
|
10536
|
|
|
$needle, |
|
10537
|
|
|
int $offset = 0, |
|
10538
|
|
|
string $encoding = 'UTF-8', |
|
10539
|
|
|
bool $clean_utf8 = false |
|
10540
|
|
|
) { |
|
10541
|
35 |
|
if ($haystack === '') { |
|
10542
|
4 |
|
if (\PHP_VERSION_ID >= 80000) { |
|
10543
|
|
|
if ($needle === '') { |
|
10544
|
|
|
return 0; |
|
10545
|
|
|
} |
|
10546
|
|
|
} else { |
|
10547
|
4 |
|
return false; |
|
10548
|
|
|
} |
|
10549
|
|
|
} |
|
10550
|
|
|
|
|
10551
|
|
|
// iconv and mbstring do not support integer $needle |
|
10552
|
34 |
|
if ((int) $needle === $needle && $needle >= 0) { |
|
10553
|
1 |
|
$needle = (string) self::chr($needle); |
|
10554
|
|
|
} |
|
10555
|
34 |
|
$needle = (string) $needle; |
|
10556
|
|
|
|
|
10557
|
34 |
|
if ($haystack === '') { |
|
10558
|
|
|
if (\PHP_VERSION_ID >= 80000 && $needle === '') { |
|
10559
|
|
|
return 0; |
|
10560
|
|
|
} |
|
10561
|
|
|
|
|
10562
|
|
|
return false; |
|
10563
|
|
|
} |
|
10564
|
|
|
|
|
10565
|
34 |
|
if ($needle === '' && \PHP_VERSION_ID < 80000) { |
|
10566
|
2 |
|
return false; |
|
10567
|
|
|
} |
|
10568
|
|
|
|
|
10569
|
34 |
|
if ($clean_utf8) { |
|
10570
|
|
|
// mb_strrpos && iconv_strrpos is not tolerant to invalid characters |
|
10571
|
4 |
|
$needle = self::clean($needle); |
|
10572
|
4 |
|
$haystack = self::clean($haystack); |
|
10573
|
|
|
} |
|
10574
|
|
|
|
|
10575
|
34 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
10576
|
8 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
10577
|
|
|
} |
|
10578
|
|
|
|
|
10579
|
|
|
// |
|
10580
|
|
|
// fallback via mbstring |
|
10581
|
|
|
// |
|
10582
|
|
|
|
|
10583
|
34 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
10584
|
34 |
|
if ($encoding === 'UTF-8') { |
|
10585
|
34 |
|
return \mb_strrpos($haystack, $needle, $offset); |
|
10586
|
|
|
} |
|
10587
|
|
|
|
|
10588
|
2 |
|
return \mb_strrpos($haystack, $needle, $offset, $encoding); |
|
10589
|
|
|
} |
|
10590
|
|
|
|
|
10591
|
|
|
// |
|
10592
|
|
|
// fallback for binary || ascii only |
|
10593
|
|
|
// |
|
10594
|
|
|
|
|
10595
|
|
|
if ( |
|
10596
|
|
|
$encoding === 'CP850' |
|
10597
|
|
|
|| |
|
10598
|
|
|
$encoding === 'ASCII' |
|
10599
|
|
|
) { |
|
10600
|
|
|
return \strrpos($haystack, $needle, $offset); |
|
10601
|
|
|
} |
|
10602
|
|
|
|
|
10603
|
|
|
if ( |
|
10604
|
|
|
$encoding !== 'UTF-8' |
|
10605
|
|
|
&& |
|
10606
|
|
|
self::$SUPPORT['mbstring'] === false |
|
10607
|
|
|
) { |
|
10608
|
|
|
/** |
|
10609
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
10610
|
|
|
*/ |
|
10611
|
|
|
\trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
|
10612
|
|
|
} |
|
10613
|
|
|
|
|
10614
|
|
|
// |
|
10615
|
|
|
// fallback via intl |
|
10616
|
|
|
// |
|
10617
|
|
|
|
|
10618
|
|
|
if ( |
|
10619
|
|
|
$offset >= 0 // grapheme_strrpos() can't handle negative offset |
|
10620
|
|
|
&& |
|
10621
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings |
|
10622
|
|
|
&& |
|
10623
|
|
|
self::$SUPPORT['intl'] === true |
|
10624
|
|
|
) { |
|
10625
|
|
|
$return_tmp = \grapheme_strrpos($haystack, $needle, $offset); |
|
10626
|
|
|
if ($return_tmp !== false) { |
|
10627
|
|
|
return $return_tmp; |
|
10628
|
|
|
} |
|
10629
|
|
|
} |
|
10630
|
|
|
|
|
10631
|
|
|
// |
|
10632
|
|
|
// fallback for ascii only |
|
10633
|
|
|
// |
|
10634
|
|
|
|
|
10635
|
|
|
if (ASCII::is_ascii($haystack . $needle)) { |
|
10636
|
|
|
return \strrpos($haystack, $needle, $offset); |
|
10637
|
|
|
} |
|
10638
|
|
|
|
|
10639
|
|
|
// |
|
10640
|
|
|
// fallback via vanilla php |
|
10641
|
|
|
// |
|
10642
|
|
|
|
|
10643
|
|
|
$haystack_tmp = null; |
|
10644
|
|
|
if ($offset > 0) { |
|
10645
|
|
|
$haystack_tmp = self::substr($haystack, $offset); |
|
10646
|
|
|
} elseif ($offset < 0) { |
|
10647
|
|
|
$haystack_tmp = self::substr($haystack, 0, $offset); |
|
10648
|
|
|
$offset = 0; |
|
10649
|
|
|
} |
|
10650
|
|
|
|
|
10651
|
|
|
if ($haystack_tmp !== null) { |
|
10652
|
|
|
if ($haystack_tmp === false) { |
|
10653
|
|
|
$haystack_tmp = ''; |
|
10654
|
|
|
} |
|
10655
|
|
|
$haystack = (string) $haystack_tmp; |
|
10656
|
|
|
} |
|
10657
|
|
|
|
|
10658
|
|
|
$pos = \strrpos($haystack, $needle); |
|
10659
|
|
|
if ($pos === false) { |
|
10660
|
|
|
return false; |
|
10661
|
|
|
} |
|
10662
|
|
|
|
|
10663
|
|
|
/** @var false|string $str_tmp - needed for PhpStan (stubs error) */ |
|
10664
|
|
|
$str_tmp = \substr($haystack, 0, $pos); |
|
10665
|
|
|
if ($str_tmp === false) { |
|
10666
|
|
|
return false; |
|
10667
|
|
|
} |
|
10668
|
|
|
|
|
10669
|
|
|
return $offset + (int) self::strlen($str_tmp); |
|
10670
|
|
|
} |
|
10671
|
|
|
|
|
10672
|
|
|
/** |
|
10673
|
|
|
* Find the position of the last occurrence of a substring in a string. |
|
10674
|
|
|
* |
|
10675
|
|
|
* @param string $haystack <p> |
|
10676
|
|
|
* The string being checked, for the last occurrence |
|
10677
|
|
|
* of needle. |
|
10678
|
|
|
* </p> |
|
10679
|
|
|
* @param string $needle <p> |
|
10680
|
|
|
* The string to find in haystack. |
|
10681
|
|
|
* </p> |
|
10682
|
|
|
* @param int $offset [optional] <p>May be specified to begin searching an arbitrary number of characters into |
|
10683
|
|
|
* the string. Negative values will stop searching at an arbitrary point |
|
10684
|
|
|
* prior to the end of the string. |
|
10685
|
|
|
* </p> |
|
10686
|
|
|
* |
|
10687
|
|
|
* @psalm-pure |
|
10688
|
|
|
* |
|
10689
|
|
|
* @return false|int |
|
10690
|
|
|
* <p>The numeric position of the last occurrence of needle in the |
|
10691
|
|
|
* haystack string. If needle is not found, it returns false.</p> |
|
10692
|
|
|
*/ |
|
10693
|
2 |
|
public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0) |
|
10694
|
|
|
{ |
|
10695
|
2 |
|
if ($haystack === '' || $needle === '') { |
|
10696
|
|
|
return false; |
|
10697
|
|
|
} |
|
10698
|
|
|
|
|
10699
|
2 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
|
10700
|
|
|
// "mb_" is available if overload is used, so use it ... |
|
10701
|
|
|
return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT |
|
10702
|
|
|
} |
|
10703
|
|
|
|
|
10704
|
2 |
|
return \strrpos($haystack, $needle, $offset); |
|
10705
|
|
|
} |
|
10706
|
|
|
|
|
10707
|
|
|
/** |
|
10708
|
|
|
* Finds the length of the initial segment of a string consisting entirely of characters contained within a given |
|
10709
|
|
|
* mask. |
|
10710
|
|
|
* |
|
10711
|
|
|
* EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code> |
|
10712
|
|
|
* |
|
10713
|
|
|
* @param string $str <p>The input string.</p> |
|
10714
|
|
|
* @param string $mask <p>The mask of chars</p> |
|
10715
|
|
|
* @param int $offset [optional] |
|
10716
|
|
|
* @param int|null $length [optional] |
|
10717
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
|
10718
|
|
|
* |
|
10719
|
|
|
* @psalm-pure |
|
10720
|
|
|
* |
|
10721
|
|
|
* @return false|int |
|
10722
|
|
|
*/ |
|
10723
|
10 |
|
public static function strspn( |
|
10724
|
|
|
string $str, |
|
10725
|
|
|
string $mask, |
|
10726
|
|
|
int $offset = 0, |
|
10727
|
|
|
int $length = null, |
|
10728
|
|
|
string $encoding = 'UTF-8' |
|
10729
|
|
|
) { |
|
10730
|
10 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
10731
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
10732
|
|
|
} |
|
10733
|
|
|
|
|
10734
|
10 |
|
if ($offset || $length !== null) { |
|
10735
|
2 |
|
if ($encoding === 'UTF-8') { |
|
10736
|
2 |
|
if ($length === null) { |
|
10737
|
|
|
$str = (string) \mb_substr($str, $offset); |
|
10738
|
|
|
} else { |
|
10739
|
2 |
|
$str = (string) \mb_substr($str, $offset, $length); |
|
10740
|
|
|
} |
|
10741
|
|
|
} else { |
|
10742
|
|
|
$str = (string) self::substr($str, $offset, $length, $encoding); |
|
10743
|
|
|
} |
|
10744
|
|
|
} |
|
10745
|
|
|
|
|
10746
|
10 |
|
if ($str === '' || $mask === '') { |
|
10747
|
2 |
|
return 0; |
|
10748
|
|
|
} |
|
10749
|
|
|
|
|
10750
|
8 |
|
$matches = []; |
|
10751
|
|
|
|
|
10752
|
8 |
|
return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0; |
|
10753
|
|
|
} |
|
10754
|
|
|
|
|
10755
|
|
|
/** |
|
10756
|
|
|
* Returns part of haystack string from the first occurrence of needle to the end of haystack. |
|
10757
|
|
|
* |
|
10758
|
|
|
* EXAMPLE: <code> |
|
10759
|
|
|
* $str = 'iñtërnâtiônàlizætiøn'; |
|
10760
|
|
|
* $search = 'nât'; |
|
10761
|
|
|
* |
|
10762
|
|
|
* UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn' |
|
10763
|
|
|
* UTF8::strstr($str, $search, true)); // 'iñtër' |
|
10764
|
|
|
* </code> |
|
10765
|
|
|
* |
|
10766
|
|
|
* @param string $haystack <p>The input string. Must be valid UTF-8.</p> |
|
10767
|
|
|
* @param string $needle <p>The string to look for. Must be valid UTF-8.</p> |
|
10768
|
|
|
* @param bool $before_needle [optional] <p> |
|
10769
|
|
|
* If <b>TRUE</b>, strstr() returns the part of the |
|
10770
|
|
|
* haystack before the first occurrence of the needle (excluding the needle). |
|
10771
|
|
|
* </p> |
|
10772
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
10773
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
10774
|
|
|
* |
|
10775
|
|
|
* @psalm-pure |
|
10776
|
|
|
* |
|
10777
|
|
|
* @return false|string |
|
10778
|
|
|
* <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p> |
|
10779
|
|
|
*/ |
|
10780
|
3 |
|
public static function strstr( |
|
10781
|
|
|
string $haystack, |
|
10782
|
|
|
string $needle, |
|
10783
|
|
|
bool $before_needle = false, |
|
10784
|
|
|
string $encoding = 'UTF-8', |
|
10785
|
|
|
bool $clean_utf8 = false |
|
10786
|
|
|
) { |
|
10787
|
3 |
|
if ($haystack === '') { |
|
10788
|
2 |
|
if (\PHP_VERSION_ID >= 80000 && $needle === '') { |
|
10789
|
|
|
return ''; |
|
10790
|
|
|
} |
|
10791
|
|
|
|
|
10792
|
2 |
|
return false; |
|
10793
|
|
|
} |
|
10794
|
|
|
|
|
10795
|
3 |
|
if ($clean_utf8) { |
|
10796
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
|
10797
|
|
|
// if invalid characters are found in $haystack before $needle |
|
10798
|
|
|
$needle = self::clean($needle); |
|
10799
|
|
|
$haystack = self::clean($haystack); |
|
10800
|
|
|
} |
|
10801
|
|
|
|
|
10802
|
3 |
|
if ($needle === '') { |
|
10803
|
1 |
|
if (\PHP_VERSION_ID >= 80000) { |
|
10804
|
|
|
return $haystack; |
|
10805
|
|
|
} |
|
10806
|
|
|
|
|
10807
|
1 |
|
return false; |
|
10808
|
|
|
} |
|
10809
|
|
|
|
|
10810
|
3 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
10811
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
10812
|
|
|
} |
|
10813
|
|
|
|
|
10814
|
|
|
// |
|
10815
|
|
|
// fallback via mbstring |
|
10816
|
|
|
// |
|
10817
|
|
|
|
|
10818
|
3 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
10819
|
3 |
|
if ($encoding === 'UTF-8') { |
|
10820
|
3 |
|
return \mb_strstr($haystack, $needle, $before_needle); |
|
10821
|
|
|
} |
|
10822
|
|
|
|
|
10823
|
2 |
|
return \mb_strstr($haystack, $needle, $before_needle, $encoding); |
|
10824
|
|
|
} |
|
10825
|
|
|
|
|
10826
|
|
|
// |
|
10827
|
|
|
// fallback for binary || ascii only |
|
10828
|
|
|
// |
|
10829
|
|
|
|
|
10830
|
|
|
if ( |
|
10831
|
|
|
$encoding === 'CP850' |
|
10832
|
|
|
|| |
|
10833
|
|
|
$encoding === 'ASCII' |
|
10834
|
|
|
) { |
|
10835
|
|
|
return \strstr($haystack, $needle, $before_needle); |
|
10836
|
|
|
} |
|
10837
|
|
|
|
|
10838
|
|
|
if ( |
|
10839
|
|
|
$encoding !== 'UTF-8' |
|
10840
|
|
|
&& |
|
10841
|
|
|
self::$SUPPORT['mbstring'] === false |
|
10842
|
|
|
) { |
|
10843
|
|
|
/** |
|
10844
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
10845
|
|
|
*/ |
|
10846
|
|
|
\trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
|
10847
|
|
|
} |
|
10848
|
|
|
|
|
10849
|
|
|
// |
|
10850
|
|
|
// fallback via intl |
|
10851
|
|
|
// |
|
10852
|
|
|
|
|
10853
|
|
|
if ( |
|
10854
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings |
|
10855
|
|
|
&& |
|
10856
|
|
|
self::$SUPPORT['intl'] === true |
|
10857
|
|
|
) { |
|
10858
|
|
|
$return_tmp = \grapheme_strstr($haystack, $needle, $before_needle); |
|
10859
|
|
|
if ($return_tmp !== false) { |
|
10860
|
|
|
return $return_tmp; |
|
10861
|
|
|
} |
|
10862
|
|
|
} |
|
10863
|
|
|
|
|
10864
|
|
|
// |
|
10865
|
|
|
// fallback for ascii only |
|
10866
|
|
|
// |
|
10867
|
|
|
|
|
10868
|
|
|
if (ASCII::is_ascii($haystack . $needle)) { |
|
10869
|
|
|
return \strstr($haystack, $needle, $before_needle); |
|
10870
|
|
|
} |
|
10871
|
|
|
|
|
10872
|
|
|
// |
|
10873
|
|
|
// fallback via vanilla php |
|
10874
|
|
|
// |
|
10875
|
|
|
|
|
10876
|
|
|
\preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match); |
|
10877
|
|
|
|
|
10878
|
|
|
if (!isset($match[1])) { |
|
10879
|
|
|
return false; |
|
10880
|
|
|
} |
|
10881
|
|
|
|
|
10882
|
|
|
if ($before_needle) { |
|
10883
|
|
|
return $match[1]; |
|
10884
|
|
|
} |
|
10885
|
|
|
|
|
10886
|
|
|
return self::substr($haystack, (int) self::strlen($match[1])); |
|
10887
|
|
|
} |
|
10888
|
|
|
|
|
10889
|
|
|
/** |
|
10890
|
|
|
* Finds first occurrence of a string within another. |
|
10891
|
|
|
* |
|
10892
|
|
|
* @param string $haystack <p> |
|
10893
|
|
|
* The string from which to get the first occurrence |
|
10894
|
|
|
* of needle. |
|
10895
|
|
|
* </p> |
|
10896
|
|
|
* @param string $needle <p> |
|
10897
|
|
|
* The string to find in haystack. |
|
10898
|
|
|
* </p> |
|
10899
|
|
|
* @param bool $before_needle [optional] <p> |
|
10900
|
|
|
* Determines which portion of haystack |
|
10901
|
|
|
* this function returns. |
|
10902
|
|
|
* If set to true, it returns all of haystack |
|
10903
|
|
|
* from the beginning to the first occurrence of needle. |
|
10904
|
|
|
* If set to false, it returns all of haystack |
|
10905
|
|
|
* from the first occurrence of needle to the end, |
|
10906
|
|
|
* </p> |
|
10907
|
|
|
* |
|
10908
|
|
|
* @psalm-pure |
|
10909
|
|
|
* |
|
10910
|
|
|
* @return false|string |
|
10911
|
|
|
* <p>The portion of haystack, |
|
10912
|
|
|
* or false if needle is not found.</p> |
|
10913
|
|
|
*/ |
|
10914
|
2 |
|
public static function strstr_in_byte( |
|
10915
|
|
|
string $haystack, |
|
10916
|
|
|
string $needle, |
|
10917
|
|
|
bool $before_needle = false |
|
10918
|
|
|
) { |
|
10919
|
2 |
|
if ($haystack === '' || $needle === '') { |
|
10920
|
|
|
return false; |
|
10921
|
|
|
} |
|
10922
|
|
|
|
|
10923
|
2 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
|
10924
|
|
|
// "mb_" is available if overload is used, so use it ... |
|
10925
|
|
|
return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT |
|
10926
|
|
|
} |
|
10927
|
|
|
|
|
10928
|
2 |
|
return \strstr($haystack, $needle, $before_needle); |
|
10929
|
|
|
} |
|
10930
|
|
|
|
|
10931
|
|
|
/** |
|
10932
|
|
|
* Unicode transformation for case-less matching. |
|
10933
|
|
|
* |
|
10934
|
|
|
* EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code> |
|
10935
|
|
|
* |
|
10936
|
|
|
* @see http://unicode.org/reports/tr21/tr21-5.html |
|
10937
|
|
|
* |
|
10938
|
|
|
* @param string $str <p>The input string.</p> |
|
10939
|
|
|
* @param bool $full [optional] <p> |
|
10940
|
|
|
* <b>true</b>, replace full case folding chars (default)<br> |
|
10941
|
|
|
* <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD] |
|
10942
|
|
|
* </p> |
|
10943
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
10944
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
|
10945
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
|
10946
|
|
|
* @param bool $lower [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase |
|
10947
|
|
|
* is for some languages better ...</p> |
|
10948
|
|
|
* |
|
10949
|
|
|
* @psalm-pure |
|
10950
|
|
|
* |
|
10951
|
|
|
* @return string |
|
10952
|
|
|
*/ |
|
10953
|
32 |
|
public static function strtocasefold( |
|
10954
|
|
|
string $str, |
|
10955
|
|
|
bool $full = true, |
|
10956
|
|
|
bool $clean_utf8 = false, |
|
10957
|
|
|
string $encoding = 'UTF-8', |
|
10958
|
|
|
string $lang = null, |
|
10959
|
|
|
bool $lower = true |
|
10960
|
|
|
): string { |
|
10961
|
32 |
|
if ($str === '') { |
|
10962
|
5 |
|
return ''; |
|
10963
|
|
|
} |
|
10964
|
|
|
|
|
10965
|
31 |
|
if ($clean_utf8) { |
|
10966
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
|
10967
|
|
|
// if invalid characters are found in $haystack before $needle |
|
10968
|
2 |
|
$str = self::clean($str); |
|
10969
|
|
|
} |
|
10970
|
|
|
|
|
10971
|
31 |
|
$str = self::fixStrCaseHelper($str, $lower, $full); |
|
10972
|
|
|
|
|
10973
|
31 |
|
if ($lang === null && $encoding === 'UTF-8') { |
|
10974
|
31 |
|
if ($lower) { |
|
10975
|
2 |
|
return \mb_strtolower($str); |
|
10976
|
|
|
} |
|
10977
|
|
|
|
|
10978
|
29 |
|
return \mb_strtoupper($str); |
|
10979
|
|
|
} |
|
10980
|
|
|
|
|
10981
|
2 |
|
if ($lower) { |
|
10982
|
|
|
return self::strtolower($str, $encoding, false, $lang); |
|
10983
|
|
|
} |
|
10984
|
|
|
|
|
10985
|
2 |
|
return self::strtoupper($str, $encoding, false, $lang); |
|
10986
|
|
|
} |
|
10987
|
|
|
|
|
10988
|
|
|
/** |
|
10989
|
|
|
* Make a string lowercase. |
|
10990
|
|
|
* |
|
10991
|
|
|
* EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code> |
|
10992
|
|
|
* |
|
10993
|
|
|
* @see http://php.net/manual/en/function.mb-strtolower.php |
|
10994
|
|
|
* |
|
10995
|
|
|
* @param string $str <p>The string being lowercased.</p> |
|
10996
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
10997
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
10998
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
|
10999
|
|
|
* tr</p> |
|
11000
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
|
11001
|
|
|
* -> ß</p> |
|
11002
|
|
|
* |
|
11003
|
|
|
* @psalm-pure |
|
11004
|
|
|
* |
|
11005
|
|
|
* @return string |
|
11006
|
|
|
* <p>String with all alphabetic characters converted to lowercase.</p> |
|
11007
|
|
|
*/ |
|
11008
|
73 |
|
public static function strtolower( |
|
11009
|
|
|
$str, |
|
11010
|
|
|
string $encoding = 'UTF-8', |
|
11011
|
|
|
bool $clean_utf8 = false, |
|
11012
|
|
|
string $lang = null, |
|
11013
|
|
|
bool $try_to_keep_the_string_length = false |
|
11014
|
|
|
): string { |
|
11015
|
|
|
// init |
|
11016
|
73 |
|
$str = (string) $str; |
|
11017
|
|
|
|
|
11018
|
73 |
|
if ($str === '') { |
|
11019
|
1 |
|
return ''; |
|
11020
|
|
|
} |
|
11021
|
|
|
|
|
11022
|
72 |
|
if ($clean_utf8) { |
|
11023
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
|
11024
|
|
|
// if invalid characters are found in $haystack before $needle |
|
11025
|
2 |
|
$str = self::clean($str); |
|
11026
|
|
|
} |
|
11027
|
|
|
|
|
11028
|
|
|
// hack for old php version or for the polyfill ... |
|
11029
|
72 |
|
if ($try_to_keep_the_string_length) { |
|
11030
|
|
|
$str = self::fixStrCaseHelper($str, true); |
|
11031
|
|
|
} |
|
11032
|
|
|
|
|
11033
|
72 |
|
if ($lang === null && $encoding === 'UTF-8') { |
|
11034
|
13 |
|
return \mb_strtolower($str); |
|
11035
|
|
|
} |
|
11036
|
|
|
|
|
11037
|
61 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
11038
|
|
|
|
|
11039
|
61 |
|
if ($lang !== null) { |
|
11040
|
2 |
|
if (self::$SUPPORT['intl'] === true) { |
|
11041
|
2 |
|
if (self::$INTL_TRANSLITERATOR_LIST === null) { |
|
11042
|
|
|
self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); |
|
11043
|
|
|
} |
|
11044
|
|
|
|
|
11045
|
2 |
|
$language_code = $lang . '-Lower'; |
|
11046
|
2 |
|
if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) { |
|
11047
|
|
|
/** |
|
11048
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
11049
|
|
|
*/ |
|
11050
|
|
|
\trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING); |
|
|
|
|
|
|
11051
|
|
|
|
|
11052
|
|
|
$language_code = 'Any-Lower'; |
|
11053
|
|
|
} |
|
11054
|
|
|
|
|
11055
|
2 |
|
return (string) \transliterator_transliterate($language_code, $str); |
|
11056
|
|
|
} |
|
11057
|
|
|
|
|
11058
|
|
|
/** |
|
11059
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
11060
|
|
|
*/ |
|
11061
|
|
|
\trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING); |
|
11062
|
|
|
} |
|
11063
|
|
|
|
|
11064
|
|
|
// always fallback via symfony polyfill |
|
11065
|
61 |
|
return \mb_strtolower($str, $encoding); |
|
11066
|
|
|
} |
|
11067
|
|
|
|
|
11068
|
|
|
/** |
|
11069
|
|
|
* Make a string uppercase. |
|
11070
|
|
|
* |
|
11071
|
|
|
* EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code> |
|
11072
|
|
|
* |
|
11073
|
|
|
* @see http://php.net/manual/en/function.mb-strtoupper.php |
|
11074
|
|
|
* |
|
11075
|
|
|
* @param string $str <p>The string being uppercased.</p> |
|
11076
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
|
11077
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
11078
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
|
11079
|
|
|
* tr</p> |
|
11080
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
|
11081
|
|
|
* -> ß</p> |
|
11082
|
|
|
* |
|
11083
|
|
|
* @psalm-pure |
|
11084
|
|
|
* |
|
11085
|
|
|
* @return string |
|
11086
|
|
|
* <p>String with all alphabetic characters converted to uppercase.</p> |
|
11087
|
|
|
*/ |
|
11088
|
17 |
|
public static function strtoupper( |
|
11089
|
|
|
$str, |
|
11090
|
|
|
string $encoding = 'UTF-8', |
|
11091
|
|
|
bool $clean_utf8 = false, |
|
11092
|
|
|
string $lang = null, |
|
11093
|
|
|
bool $try_to_keep_the_string_length = false |
|
11094
|
|
|
): string { |
|
11095
|
|
|
// init |
|
11096
|
17 |
|
$str = (string) $str; |
|
11097
|
|
|
|
|
11098
|
17 |
|
if ($str === '') { |
|
11099
|
1 |
|
return ''; |
|
11100
|
|
|
} |
|
11101
|
|
|
|
|
11102
|
16 |
|
if ($clean_utf8) { |
|
11103
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
|
11104
|
|
|
// if invalid characters are found in $haystack before $needle |
|
11105
|
2 |
|
$str = self::clean($str); |
|
11106
|
|
|
} |
|
11107
|
|
|
|
|
11108
|
|
|
// hack for old php version or for the polyfill ... |
|
11109
|
16 |
|
if ($try_to_keep_the_string_length) { |
|
11110
|
2 |
|
$str = self::fixStrCaseHelper($str); |
|
11111
|
|
|
} |
|
11112
|
|
|
|
|
11113
|
16 |
|
if ($lang === null && $encoding === 'UTF-8') { |
|
11114
|
8 |
|
return \mb_strtoupper($str); |
|
11115
|
|
|
} |
|
11116
|
|
|
|
|
11117
|
10 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
11118
|
|
|
|
|
11119
|
10 |
|
if ($lang !== null) { |
|
11120
|
2 |
|
if (self::$SUPPORT['intl'] === true) { |
|
11121
|
2 |
|
if (self::$INTL_TRANSLITERATOR_LIST === null) { |
|
11122
|
|
|
self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); |
|
11123
|
|
|
} |
|
11124
|
|
|
|
|
11125
|
2 |
|
$language_code = $lang . '-Upper'; |
|
11126
|
2 |
|
if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) { |
|
11127
|
|
|
/** |
|
11128
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
11129
|
|
|
*/ |
|
11130
|
|
|
\trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING); |
|
11131
|
|
|
|
|
11132
|
|
|
$language_code = 'Any-Upper'; |
|
11133
|
|
|
} |
|
11134
|
|
|
|
|
11135
|
2 |
|
return (string) \transliterator_transliterate($language_code, $str); |
|
11136
|
|
|
} |
|
11137
|
|
|
|
|
11138
|
|
|
/** |
|
11139
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
11140
|
|
|
*/ |
|
11141
|
|
|
\trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING); |
|
11142
|
|
|
} |
|
11143
|
|
|
|
|
11144
|
|
|
// always fallback via symfony polyfill |
|
11145
|
10 |
|
return \mb_strtoupper($str, $encoding); |
|
11146
|
|
|
} |
|
11147
|
|
|
|
|
11148
|
|
|
/** |
|
11149
|
|
|
* Translate characters or replace sub-strings. |
|
11150
|
|
|
* |
|
11151
|
|
|
* EXAMPLE: |
|
11152
|
|
|
* <code> |
|
11153
|
|
|
* $array = [ |
|
11154
|
|
|
* 'Hello' => '○●◎', |
|
11155
|
|
|
* '中文空白' => 'earth', |
|
11156
|
|
|
* ]; |
|
11157
|
|
|
* UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth' |
|
11158
|
|
|
* </code> |
|
11159
|
|
|
* |
|
11160
|
|
|
* @see http://php.net/manual/en/function.strtr.php |
|
11161
|
|
|
* |
|
11162
|
|
|
* @param string $str <p>The string being translated.</p> |
|
11163
|
|
|
* @param string|string[] $from <p>The string replacing from.</p> |
|
11164
|
|
|
* @param string|string[] $to [optional] <p>The string being translated to to.</p> |
|
11165
|
|
|
* |
|
11166
|
|
|
* @psalm-pure |
|
11167
|
|
|
* |
|
11168
|
|
|
* @return string |
|
11169
|
|
|
* <p>This function returns a copy of str, translating all occurrences of each character in "from" |
|
11170
|
|
|
* to the corresponding character in "to".</p> |
|
11171
|
|
|
*/ |
|
11172
|
2 |
|
public static function strtr(string $str, $from, $to = ''): string |
|
11173
|
|
|
{ |
|
11174
|
2 |
|
if ($str === '') { |
|
11175
|
|
|
return ''; |
|
11176
|
|
|
} |
|
11177
|
|
|
|
|
11178
|
2 |
|
if ($from === $to) { |
|
11179
|
|
|
return $str; |
|
11180
|
|
|
} |
|
11181
|
|
|
|
|
11182
|
2 |
|
if ($to !== '') { |
|
11183
|
2 |
|
if (!\is_array($from)) { |
|
11184
|
2 |
|
$from = self::str_split($from); |
|
11185
|
|
|
} |
|
11186
|
|
|
|
|
11187
|
2 |
|
if (!\is_array($to)) { |
|
11188
|
2 |
|
$to = self::str_split($to); |
|
11189
|
|
|
} |
|
11190
|
|
|
|
|
11191
|
2 |
|
$count_from = \count($from); |
|
11192
|
2 |
|
$count_to = \count($to); |
|
11193
|
|
|
|
|
11194
|
2 |
|
if ($count_from !== $count_to) { |
|
11195
|
2 |
|
if ($count_from > $count_to) { |
|
11196
|
2 |
|
$from = \array_slice($from, 0, $count_to); |
|
11197
|
2 |
|
} elseif ($count_from < $count_to) { |
|
11198
|
2 |
|
$to = \array_slice($to, 0, $count_from); |
|
11199
|
|
|
} |
|
11200
|
|
|
} |
|
11201
|
|
|
|
|
11202
|
2 |
|
$from = \array_combine($from, $to); |
|
11203
|
2 |
|
if ($from === false) { |
|
11204
|
|
|
throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')'); |
|
|
|
|
|
|
11205
|
|
|
} |
|
11206
|
|
|
} |
|
11207
|
|
|
|
|
11208
|
2 |
|
if (\is_string($from)) { |
|
11209
|
2 |
|
return \str_replace($from, $to, $str); |
|
|
|
|
|
|
11210
|
|
|
} |
|
11211
|
|
|
|
|
11212
|
2 |
|
return \strtr($str, $from); |
|
11213
|
|
|
} |
|
11214
|
|
|
|
|
11215
|
|
|
/** |
|
11216
|
|
|
* Return the width of a string. |
|
11217
|
|
|
* |
|
11218
|
|
|
* INFO: use UTF8::strlen() for the byte-length |
|
11219
|
|
|
* |
|
11220
|
|
|
* EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code> |
|
11221
|
|
|
* |
|
11222
|
|
|
* @param string $str <p>The input string.</p> |
|
11223
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
11224
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
11225
|
|
|
* |
|
11226
|
|
|
* @psalm-pure |
|
11227
|
|
|
* |
|
11228
|
|
|
* @return int |
|
11229
|
|
|
*/ |
|
11230
|
2 |
|
public static function strwidth( |
|
11231
|
|
|
string $str, |
|
11232
|
|
|
string $encoding = 'UTF-8', |
|
11233
|
|
|
bool $clean_utf8 = false |
|
11234
|
|
|
): int { |
|
11235
|
2 |
|
if ($str === '') { |
|
11236
|
2 |
|
return 0; |
|
11237
|
|
|
} |
|
11238
|
|
|
|
|
11239
|
2 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
11240
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
11241
|
|
|
} |
|
11242
|
|
|
|
|
11243
|
2 |
|
if ($clean_utf8) { |
|
11244
|
|
|
// iconv and mbstring are not tolerant to invalid encoding |
|
11245
|
|
|
// further, their behaviour is inconsistent with that of PHP's substr |
|
11246
|
2 |
|
$str = self::clean($str); |
|
11247
|
|
|
} |
|
11248
|
|
|
|
|
11249
|
|
|
// |
|
11250
|
|
|
// fallback via mbstring |
|
11251
|
|
|
// |
|
11252
|
|
|
|
|
11253
|
2 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
11254
|
2 |
|
if ($encoding === 'UTF-8') { |
|
11255
|
2 |
|
return \mb_strwidth($str); |
|
11256
|
|
|
} |
|
11257
|
|
|
|
|
11258
|
|
|
return \mb_strwidth($str, $encoding); |
|
11259
|
|
|
} |
|
11260
|
|
|
|
|
11261
|
|
|
// |
|
11262
|
|
|
// fallback via vanilla php |
|
11263
|
|
|
// |
|
11264
|
|
|
|
|
11265
|
|
|
if ($encoding !== 'UTF-8') { |
|
11266
|
|
|
$str = self::encode('UTF-8', $str, false, $encoding); |
|
11267
|
|
|
} |
|
11268
|
|
|
|
|
11269
|
|
|
$wide = 0; |
|
11270
|
|
|
$str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide); |
|
11271
|
|
|
|
|
11272
|
|
|
return ($wide << 1) + (int) self::strlen($str); |
|
11273
|
|
|
} |
|
11274
|
|
|
|
|
11275
|
|
|
/** |
|
11276
|
|
|
* Get part of a string. |
|
11277
|
|
|
* |
|
11278
|
|
|
* EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code> |
|
11279
|
|
|
* |
|
11280
|
|
|
* @see http://php.net/manual/en/function.mb-substr.php |
|
11281
|
|
|
* |
|
11282
|
|
|
* @param string $str <p>The string being checked.</p> |
|
11283
|
|
|
* @param int $offset <p>The first position used in str.</p> |
|
11284
|
|
|
* @param int|null $length [optional] <p>The maximum length of the returned string.</p> |
|
11285
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
11286
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
11287
|
|
|
* |
|
11288
|
|
|
* @psalm-pure |
|
11289
|
|
|
* |
|
11290
|
|
|
* @return false|string |
|
11291
|
|
|
* The portion of <i>str</i> specified by the <i>offset</i> and |
|
11292
|
|
|
* <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i> |
|
11293
|
|
|
* characters long, <b>FALSE</b> will be returned. |
|
11294
|
|
|
*/ |
|
11295
|
172 |
|
public static function substr( |
|
11296
|
|
|
string $str, |
|
11297
|
|
|
int $offset = 0, |
|
11298
|
|
|
int $length = null, |
|
11299
|
|
|
string $encoding = 'UTF-8', |
|
11300
|
|
|
bool $clean_utf8 = false |
|
11301
|
|
|
) { |
|
11302
|
|
|
// empty string |
|
11303
|
172 |
|
if ($str === '' || $length === 0) { |
|
11304
|
8 |
|
return ''; |
|
11305
|
|
|
} |
|
11306
|
|
|
|
|
11307
|
168 |
|
if ($clean_utf8) { |
|
11308
|
|
|
// iconv and mbstring are not tolerant to invalid encoding |
|
11309
|
|
|
// further, their behaviour is inconsistent with that of PHP's substr |
|
11310
|
2 |
|
$str = self::clean($str); |
|
11311
|
|
|
} |
|
11312
|
|
|
|
|
11313
|
|
|
// whole string |
|
11314
|
168 |
|
if (!$offset && $length === null) { |
|
11315
|
7 |
|
return $str; |
|
11316
|
|
|
} |
|
11317
|
|
|
|
|
11318
|
163 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
11319
|
19 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
11320
|
|
|
} |
|
11321
|
|
|
|
|
11322
|
|
|
// |
|
11323
|
|
|
// fallback via mbstring |
|
11324
|
|
|
// |
|
11325
|
|
|
|
|
11326
|
163 |
|
if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') { |
|
11327
|
161 |
|
if ($length === null) { |
|
11328
|
64 |
|
return \mb_substr($str, $offset); |
|
11329
|
|
|
} |
|
11330
|
|
|
|
|
11331
|
102 |
|
return \mb_substr($str, $offset, $length); |
|
11332
|
|
|
} |
|
11333
|
|
|
|
|
11334
|
|
|
// |
|
11335
|
|
|
// fallback for binary || ascii only |
|
11336
|
|
|
// |
|
11337
|
|
|
|
|
11338
|
|
|
if ( |
|
11339
|
4 |
|
$encoding === 'CP850' |
|
11340
|
|
|
|| |
|
11341
|
4 |
|
$encoding === 'ASCII' |
|
11342
|
|
|
) { |
|
11343
|
|
|
if ($length === null) { |
|
11344
|
|
|
return \substr($str, $offset); |
|
11345
|
|
|
} |
|
11346
|
|
|
|
|
11347
|
|
|
return \substr($str, $offset, $length); |
|
11348
|
|
|
} |
|
11349
|
|
|
|
|
11350
|
|
|
// otherwise we need the string-length |
|
11351
|
4 |
|
$str_length = 0; |
|
11352
|
4 |
|
if ($offset || $length === null) { |
|
11353
|
4 |
|
$str_length = self::strlen($str, $encoding); |
|
11354
|
|
|
} |
|
11355
|
|
|
|
|
11356
|
|
|
// e.g.: invalid chars + mbstring not installed |
|
11357
|
4 |
|
if ($str_length === false) { |
|
11358
|
|
|
return false; |
|
11359
|
|
|
} |
|
11360
|
|
|
|
|
11361
|
|
|
// empty string |
|
11362
|
4 |
|
if ($offset === $str_length && !$length) { |
|
|
|
|
|
|
11363
|
|
|
return ''; |
|
11364
|
|
|
} |
|
11365
|
|
|
|
|
11366
|
|
|
// impossible |
|
11367
|
4 |
|
if ($offset && $offset > $str_length) { |
|
11368
|
|
|
return ''; |
|
11369
|
|
|
} |
|
11370
|
|
|
|
|
11371
|
4 |
|
$length = $length ?? $str_length; |
|
11372
|
|
|
|
|
11373
|
|
|
if ( |
|
11374
|
4 |
|
$encoding !== 'UTF-8' |
|
11375
|
|
|
&& |
|
11376
|
4 |
|
self::$SUPPORT['mbstring'] === false |
|
11377
|
|
|
) { |
|
11378
|
|
|
/** |
|
11379
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
11380
|
|
|
*/ |
|
11381
|
2 |
|
\trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
|
11382
|
|
|
} |
|
11383
|
|
|
|
|
11384
|
|
|
// |
|
11385
|
|
|
// fallback via intl |
|
11386
|
|
|
// |
|
11387
|
|
|
|
|
11388
|
|
|
if ( |
|
11389
|
4 |
|
$encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings |
|
11390
|
|
|
&& |
|
11391
|
4 |
|
$offset >= 0 // grapheme_substr() can't handle negative offset |
|
11392
|
|
|
&& |
|
11393
|
4 |
|
self::$SUPPORT['intl'] === true |
|
11394
|
|
|
) { |
|
11395
|
|
|
$return_tmp = \grapheme_substr($str, $offset, $length); |
|
11396
|
|
|
if ($return_tmp !== false) { |
|
11397
|
|
|
return $return_tmp; |
|
11398
|
|
|
} |
|
11399
|
|
|
} |
|
11400
|
|
|
|
|
11401
|
|
|
// |
|
11402
|
|
|
// fallback via iconv |
|
11403
|
|
|
// |
|
11404
|
|
|
|
|
11405
|
|
|
if ( |
|
11406
|
4 |
|
$length >= 0 // "iconv_substr()" can't handle negative length |
|
11407
|
|
|
&& |
|
11408
|
4 |
|
self::$SUPPORT['iconv'] === true |
|
11409
|
|
|
) { |
|
11410
|
|
|
$return_tmp = \iconv_substr($str, $offset, $length); |
|
11411
|
|
|
if ($return_tmp !== false) { |
|
11412
|
|
|
return $return_tmp; |
|
11413
|
|
|
} |
|
11414
|
|
|
} |
|
11415
|
|
|
|
|
11416
|
|
|
// |
|
11417
|
|
|
// fallback for ascii only |
|
11418
|
|
|
// |
|
11419
|
|
|
|
|
11420
|
4 |
|
if (ASCII::is_ascii($str)) { |
|
11421
|
|
|
return \substr($str, $offset, $length); |
|
11422
|
|
|
} |
|
11423
|
|
|
|
|
11424
|
|
|
// |
|
11425
|
|
|
// fallback via vanilla php |
|
11426
|
|
|
// |
|
11427
|
|
|
|
|
11428
|
|
|
// split to array, and remove invalid characters |
|
11429
|
|
|
// && |
|
11430
|
|
|
// extract relevant part, and join to make sting again |
|
11431
|
4 |
|
return \implode('', \array_slice(self::str_split($str), $offset, $length)); |
|
11432
|
|
|
} |
|
11433
|
|
|
|
|
11434
|
|
|
/** |
|
11435
|
|
|
* Binary-safe comparison of two strings from an offset, up to a length of characters. |
|
11436
|
|
|
* |
|
11437
|
|
|
* EXAMPLE: <code> |
|
11438
|
|
|
* UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1 |
|
11439
|
|
|
* UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1 |
|
11440
|
|
|
* UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0 |
|
11441
|
|
|
* </code> |
|
11442
|
|
|
* |
|
11443
|
|
|
* @param string $str1 <p>The main string being compared.</p> |
|
11444
|
|
|
* @param string $str2 <p>The secondary string being compared.</p> |
|
11445
|
|
|
* @param int $offset [optional] <p>The start position for the comparison. If negative, it starts |
|
11446
|
|
|
* counting from the end of the string.</p> |
|
11447
|
|
|
* @param int|null $length [optional] <p>The length of the comparison. The default value is the largest |
|
11448
|
|
|
* of the length of the str compared to the length of main_str less the |
|
11449
|
|
|
* offset.</p> |
|
11450
|
|
|
* @param bool $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case |
|
11451
|
|
|
* insensitive.</p> |
|
11452
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
11453
|
|
|
* |
|
11454
|
|
|
* @psalm-pure |
|
11455
|
|
|
* |
|
11456
|
|
|
* @return int |
|
11457
|
|
|
* <strong>< 0</strong> if str1 is less than str2;<br> |
|
11458
|
|
|
* <strong>> 0</strong> if str1 is greater than str2,<br> |
|
11459
|
|
|
* <strong>0</strong> if they are equal |
|
11460
|
|
|
*/ |
|
11461
|
2 |
|
public static function substr_compare( |
|
11462
|
|
|
string $str1, |
|
11463
|
|
|
string $str2, |
|
11464
|
|
|
int $offset = 0, |
|
11465
|
|
|
int $length = null, |
|
11466
|
|
|
bool $case_insensitivity = false, |
|
11467
|
|
|
string $encoding = 'UTF-8' |
|
11468
|
|
|
): int { |
|
11469
|
|
|
if ( |
|
11470
|
2 |
|
$offset !== 0 |
|
11471
|
|
|
|| |
|
11472
|
2 |
|
$length !== null |
|
11473
|
|
|
) { |
|
11474
|
2 |
|
if ($encoding === 'UTF-8') { |
|
11475
|
2 |
|
if ($length === null) { |
|
11476
|
2 |
|
$str1 = (string) \mb_substr($str1, $offset); |
|
11477
|
|
|
} else { |
|
11478
|
2 |
|
$str1 = (string) \mb_substr($str1, $offset, $length); |
|
11479
|
|
|
} |
|
11480
|
2 |
|
$str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1)); |
|
11481
|
|
|
} else { |
|
11482
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
11483
|
|
|
|
|
11484
|
|
|
$str1 = (string) self::substr($str1, $offset, $length, $encoding); |
|
11485
|
|
|
$str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding); |
|
11486
|
|
|
} |
|
11487
|
|
|
} |
|
11488
|
|
|
|
|
11489
|
2 |
|
if ($case_insensitivity) { |
|
11490
|
2 |
|
return self::strcasecmp($str1, $str2, $encoding); |
|
11491
|
|
|
} |
|
11492
|
|
|
|
|
11493
|
2 |
|
return self::strcmp($str1, $str2); |
|
11494
|
|
|
} |
|
11495
|
|
|
|
|
11496
|
|
|
/** |
|
11497
|
|
|
* Count the number of substring occurrences. |
|
11498
|
|
|
* |
|
11499
|
|
|
* EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code> |
|
11500
|
|
|
* |
|
11501
|
|
|
* @see http://php.net/manual/en/function.substr-count.php |
|
11502
|
|
|
* |
|
11503
|
|
|
* @param string $haystack <p>The string to search in.</p> |
|
11504
|
|
|
* @param string $needle <p>The substring to search for.</p> |
|
11505
|
|
|
* @param int $offset [optional] <p>The offset where to start counting.</p> |
|
11506
|
|
|
* @param int|null $length [optional] <p> |
|
11507
|
|
|
* The maximum length after the specified offset to search for the |
|
11508
|
|
|
* substring. It outputs a warning if the offset plus the length is |
|
11509
|
|
|
* greater than the haystack length. |
|
11510
|
|
|
* </p> |
|
11511
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
11512
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
11513
|
|
|
* |
|
11514
|
|
|
* @psalm-pure |
|
11515
|
|
|
* |
|
11516
|
|
|
* @return false|int |
|
11517
|
|
|
* <p>This functions returns an integer or false if there isn't a string.</p> |
|
11518
|
|
|
*/ |
|
11519
|
5 |
|
public static function substr_count( |
|
11520
|
|
|
string $haystack, |
|
11521
|
|
|
string $needle, |
|
11522
|
|
|
int $offset = 0, |
|
11523
|
|
|
int $length = null, |
|
11524
|
|
|
string $encoding = 'UTF-8', |
|
11525
|
|
|
bool $clean_utf8 = false |
|
11526
|
|
|
) { |
|
11527
|
5 |
|
if ($needle === '') { |
|
11528
|
2 |
|
return false; |
|
11529
|
|
|
} |
|
11530
|
|
|
|
|
11531
|
5 |
|
if ($haystack === '') { |
|
11532
|
2 |
|
if (\PHP_VERSION_ID >= 80000) { |
|
11533
|
|
|
return 0; |
|
11534
|
|
|
} |
|
11535
|
|
|
|
|
11536
|
2 |
|
return 0; |
|
11537
|
|
|
} |
|
11538
|
|
|
|
|
11539
|
5 |
|
if ($length === 0) { |
|
11540
|
2 |
|
return 0; |
|
11541
|
|
|
} |
|
11542
|
|
|
|
|
11543
|
5 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
|
11544
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
11545
|
|
|
} |
|
11546
|
|
|
|
|
11547
|
5 |
|
if ($clean_utf8) { |
|
11548
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
|
11549
|
|
|
// if invalid characters are found in $haystack before $needle |
|
11550
|
|
|
$needle = self::clean($needle); |
|
11551
|
|
|
$haystack = self::clean($haystack); |
|
11552
|
|
|
} |
|
11553
|
|
|
|
|
11554
|
5 |
|
if ($offset || $length > 0) { |
|
11555
|
2 |
|
if ($length === null) { |
|
11556
|
2 |
|
$length_tmp = self::strlen($haystack, $encoding); |
|
11557
|
2 |
|
if ($length_tmp === false) { |
|
11558
|
|
|
return false; |
|
11559
|
|
|
} |
|
11560
|
2 |
|
$length = $length_tmp; |
|
11561
|
|
|
} |
|
11562
|
|
|
|
|
11563
|
2 |
|
if ($encoding === 'UTF-8') { |
|
11564
|
2 |
|
$haystack = (string) \mb_substr($haystack, $offset, $length); |
|
11565
|
|
|
} else { |
|
11566
|
2 |
|
$haystack = (string) \mb_substr($haystack, $offset, $length, $encoding); |
|
11567
|
|
|
} |
|
11568
|
|
|
} |
|
11569
|
|
|
|
|
11570
|
|
|
if ( |
|
11571
|
5 |
|
$encoding !== 'UTF-8' |
|
11572
|
|
|
&& |
|
11573
|
5 |
|
self::$SUPPORT['mbstring'] === false |
|
11574
|
|
|
) { |
|
11575
|
|
|
/** |
|
11576
|
|
|
* @psalm-suppress ImpureFunctionCall - is is only a warning |
|
11577
|
|
|
*/ |
|
11578
|
|
|
\trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
|
11579
|
|
|
} |
|
11580
|
|
|
|
|
11581
|
5 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
11582
|
5 |
|
if ($encoding === 'UTF-8') { |
|
11583
|
5 |
|
return \mb_substr_count($haystack, $needle); |
|
11584
|
|
|
} |
|
11585
|
|
|
|
|
11586
|
2 |
|
return \mb_substr_count($haystack, $needle, $encoding); |
|
11587
|
|
|
} |
|
11588
|
|
|
|
|
11589
|
|
|
\preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER); |
|
11590
|
|
|
|
|
11591
|
|
|
return \count($matches); |
|
11592
|
|
|
} |
|
11593
|
|
|
|
|
11594
|
|
|
/** |
|
11595
|
|
|
* Count the number of substring occurrences. |
|
11596
|
|
|
* |
|
11597
|
|
|
* @param string $haystack <p> |
|
11598
|
|
|
* The string being checked. |
|
11599
|
|
|
* </p> |
|
11600
|
|
|
* @param string $needle <p> |
|
11601
|
|
|
* The string being found. |
|
11602
|
|
|
* </p> |
|
11603
|
|
|
* @param int $offset [optional] <p> |
|
11604
|
|
|
* The offset where to start counting |
|
11605
|
|
|
* </p> |
|
11606
|
|
|
* @param int|null $length [optional] <p> |
|
11607
|
|
|
* The maximum length after the specified offset to search for the |
|
11608
|
|
|
* substring. It outputs a warning if the offset plus the length is |
|
11609
|
|
|
* greater than the haystack length. |
|
11610
|
|
|
* </p> |
|
11611
|
|
|
* |
|
11612
|
|
|
* @psalm-pure |
|
11613
|
|
|
* |
|
11614
|
|
|
* @return false|int |
|
11615
|
|
|
* <p>The number of times the |
|
11616
|
|
|
* needle substring occurs in the |
|
11617
|
|
|
* haystack string.</p> |
|
11618
|
|
|
*/ |
|
11619
|
4 |
|
public static function substr_count_in_byte( |
|
11620
|
|
|
string $haystack, |
|
11621
|
|
|
string $needle, |
|
11622
|
|
|
int $offset = 0, |
|
11623
|
|
|
int $length = null |
|
11624
|
|
|
) { |
|
11625
|
4 |
|
if ($haystack === '' || $needle === '') { |
|
11626
|
1 |
|
return 0; |
|
11627
|
|
|
} |
|
11628
|
|
|
|
|
11629
|
|
|
if ( |
|
11630
|
3 |
|
($offset || $length !== null) |
|
11631
|
|
|
&& |
|
11632
|
3 |
|
self::$SUPPORT['mbstring_func_overload'] === true |
|
11633
|
|
|
) { |
|
11634
|
|
|
if ($length === null) { |
|
11635
|
|
|
$length_tmp = self::strlen($haystack); |
|
11636
|
|
|
if ($length_tmp === false) { |
|
11637
|
|
|
return false; |
|
11638
|
|
|
} |
|
11639
|
|
|
$length = $length_tmp; |
|
11640
|
|
|
} |
|
11641
|
|
|
|
|
11642
|
|
|
if ( |
|
11643
|
|
|
( |
|
11644
|
|
|
$length !== 0 |
|
11645
|
|
|
&& |
|
11646
|
|
|
$offset !== 0 |
|
11647
|
|
|
) |
|
11648
|
|
|
&& |
|
11649
|
|
|
($length + $offset) <= 0 |
|
11650
|
|
|
&& |
|
11651
|
|
|
\PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1 |
|
11652
|
|
|
) { |
|
11653
|
|
|
return false; |
|
11654
|
|
|
} |
|
11655
|
|
|
|
|
11656
|
|
|
/** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */ |
|
11657
|
|
|
$haystack_tmp = \substr($haystack, $offset, $length); |
|
11658
|
|
|
if ($haystack_tmp === false) { |
|
11659
|
|
|
$haystack_tmp = ''; |
|
11660
|
|
|
} |
|
11661
|
|
|
$haystack = (string) $haystack_tmp; |
|
11662
|
|
|
} |
|
11663
|
|
|
|
|
11664
|
3 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
|
11665
|
|
|
// "mb_" is available if overload is used, so use it ... |
|
11666
|
|
|
return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT |
|
11667
|
|
|
} |
|
11668
|
|
|
|
|
11669
|
3 |
|
if ($length === null) { |
|
11670
|
3 |
|
return \substr_count($haystack, $needle, $offset); |
|
11671
|
|
|
} |
|
11672
|
|
|
|
|
11673
|
|
|
return \substr_count($haystack, $needle, $offset, $length); |
|
11674
|
|
|
} |
|
11675
|
|
|
|
|
11676
|
|
|
/** |
|
11677
|
|
|
* Returns the number of occurrences of $substring in the given string. |
|
11678
|
|
|
* By default, the comparison is case-sensitive, but can be made insensitive |
|
11679
|
|
|
* by setting $case_sensitive to false. |
|
11680
|
|
|
* |
|
11681
|
|
|
* @param string $str <p>The input string.</p> |
|
11682
|
|
|
* @param string $substring <p>The substring to search for.</p> |
|
11683
|
|
|
* @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
|
11684
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
11685
|
|
|
* |
|
11686
|
|
|
* @psalm-pure |
|
11687
|
|
|
* |
|
11688
|
|
|
* @return int |
|
11689
|
|
|
*/ |
|
11690
|
15 |
|
public static function substr_count_simple( |
|
11691
|
|
|
string $str, |
|
11692
|
|
|
string $substring, |
|
11693
|
|
|
bool $case_sensitive = true, |
|
11694
|
|
|
string $encoding = 'UTF-8' |
|
11695
|
|
|
): int { |
|
11696
|
15 |
|
if ($str === '' || $substring === '') { |
|
11697
|
2 |
|
return 0; |
|
11698
|
|
|
} |
|
11699
|
|
|
|
|
11700
|
13 |
|
if ($encoding === 'UTF-8') { |
|
11701
|
7 |
|
if ($case_sensitive) { |
|
11702
|
|
|
return (int) \mb_substr_count($str, $substring); |
|
11703
|
|
|
} |
|
11704
|
|
|
|
|
11705
|
7 |
|
return (int) \mb_substr_count( |
|
11706
|
7 |
|
\mb_strtoupper($str), |
|
11707
|
7 |
|
\mb_strtoupper($substring) |
|
11708
|
|
|
); |
|
11709
|
|
|
} |
|
11710
|
|
|
|
|
11711
|
6 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
11712
|
|
|
|
|
11713
|
6 |
|
if ($case_sensitive) { |
|
11714
|
3 |
|
return (int) \mb_substr_count($str, $substring, $encoding); |
|
11715
|
|
|
} |
|
11716
|
|
|
|
|
11717
|
3 |
|
return (int) \mb_substr_count( |
|
11718
|
3 |
|
self::strtocasefold($str, true, false, $encoding, null, false), |
|
11719
|
3 |
|
self::strtocasefold($substring, true, false, $encoding, null, false), |
|
11720
|
3 |
|
$encoding |
|
11721
|
|
|
); |
|
11722
|
|
|
} |
|
11723
|
|
|
|
|
11724
|
|
|
/** |
|
11725
|
|
|
* Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive. |
|
11726
|
|
|
* |
|
11727
|
|
|
* EXMAPLE: <code> |
|
11728
|
|
|
* UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd' |
|
11729
|
|
|
* UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd' |
|
11730
|
|
|
* </code> |
|
11731
|
|
|
* |
|
11732
|
|
|
* @param string $haystack <p>The string to search in.</p> |
|
11733
|
|
|
* @param string $needle <p>The substring to search for.</p> |
|
11734
|
|
|
* |
|
11735
|
|
|
* @psalm-pure |
|
11736
|
|
|
* |
|
11737
|
|
|
* @return string |
|
11738
|
|
|
* <p>Return the sub-string.</p> |
|
11739
|
|
|
*/ |
|
11740
|
2 |
|
public static function substr_ileft(string $haystack, string $needle): string |
|
11741
|
|
|
{ |
|
11742
|
2 |
|
if ($haystack === '') { |
|
11743
|
2 |
|
return ''; |
|
11744
|
|
|
} |
|
11745
|
|
|
|
|
11746
|
2 |
|
if ($needle === '') { |
|
11747
|
2 |
|
return $haystack; |
|
11748
|
|
|
} |
|
11749
|
|
|
|
|
11750
|
2 |
|
if (self::str_istarts_with($haystack, $needle)) { |
|
11751
|
2 |
|
$haystack = (string) \mb_substr($haystack, (int) self::strlen($needle)); |
|
11752
|
|
|
} |
|
11753
|
|
|
|
|
11754
|
2 |
|
return $haystack; |
|
11755
|
|
|
} |
|
11756
|
|
|
|
|
11757
|
|
|
/** |
|
11758
|
|
|
* Get part of a string process in bytes. |
|
11759
|
|
|
* |
|
11760
|
|
|
* @param string $str <p>The string being checked.</p> |
|
11761
|
|
|
* @param int $offset <p>The first position used in str.</p> |
|
11762
|
|
|
* @param int|null $length [optional] <p>The maximum length of the returned string.</p> |
|
11763
|
|
|
* |
|
11764
|
|
|
* @psalm-pure |
|
11765
|
|
|
* |
|
11766
|
|
|
* @return false|string |
|
11767
|
|
|
* The portion of <i>str</i> specified by the <i>offset</i> and |
|
11768
|
|
|
* <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i> |
|
11769
|
|
|
* characters long, <b>FALSE</b> will be returned. |
|
11770
|
|
|
*/ |
|
11771
|
1 |
|
public static function substr_in_byte(string $str, int $offset = 0, int $length = null) |
|
11772
|
|
|
{ |
|
11773
|
|
|
// empty string |
|
11774
|
1 |
|
if ($str === '' || $length === 0) { |
|
11775
|
|
|
return ''; |
|
11776
|
|
|
} |
|
11777
|
|
|
|
|
11778
|
|
|
// whole string |
|
11779
|
1 |
|
if (!$offset && $length === null) { |
|
11780
|
|
|
return $str; |
|
11781
|
|
|
} |
|
11782
|
|
|
|
|
11783
|
1 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
|
11784
|
|
|
// "mb_" is available if overload is used, so use it ... |
|
11785
|
|
|
return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT |
|
11786
|
|
|
} |
|
11787
|
|
|
|
|
11788
|
1 |
|
return \substr($str, $offset, $length ?? 2147483647); |
|
11789
|
|
|
} |
|
11790
|
|
|
|
|
11791
|
|
|
/** |
|
11792
|
|
|
* Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive. |
|
11793
|
|
|
* |
|
11794
|
|
|
* EXAMPLE: <code> |
|
11795
|
|
|
* UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle' |
|
11796
|
|
|
* UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle' |
|
11797
|
|
|
* </code> |
|
11798
|
|
|
* |
|
11799
|
|
|
* @param string $haystack <p>The string to search in.</p> |
|
11800
|
|
|
* @param string $needle <p>The substring to search for.</p> |
|
11801
|
|
|
* |
|
11802
|
|
|
* @psalm-pure |
|
11803
|
|
|
* |
|
11804
|
|
|
* @return string |
|
11805
|
|
|
* <p>Return the sub-string.<p> |
|
11806
|
|
|
*/ |
|
11807
|
2 |
|
public static function substr_iright(string $haystack, string $needle): string |
|
11808
|
|
|
{ |
|
11809
|
2 |
|
if ($haystack === '') { |
|
11810
|
2 |
|
return ''; |
|
11811
|
|
|
} |
|
11812
|
|
|
|
|
11813
|
2 |
|
if ($needle === '') { |
|
11814
|
2 |
|
return $haystack; |
|
11815
|
|
|
} |
|
11816
|
|
|
|
|
11817
|
2 |
|
if (self::str_iends_with($haystack, $needle)) { |
|
11818
|
2 |
|
$haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle)); |
|
11819
|
|
|
} |
|
11820
|
|
|
|
|
11821
|
2 |
|
return $haystack; |
|
11822
|
|
|
} |
|
11823
|
|
|
|
|
11824
|
|
|
/** |
|
11825
|
|
|
* Removes a prefix ($needle) from the beginning of the string ($haystack). |
|
11826
|
|
|
* |
|
11827
|
|
|
* EXAMPLE: <code> |
|
11828
|
|
|
* UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd' |
|
11829
|
|
|
* UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd' |
|
11830
|
|
|
* </code> |
|
11831
|
|
|
* |
|
11832
|
|
|
* @param string $haystack <p>The string to search in.</p> |
|
11833
|
|
|
* @param string $needle <p>The substring to search for.</p> |
|
11834
|
|
|
* |
|
11835
|
|
|
* @psalm-pure |
|
11836
|
|
|
* |
|
11837
|
|
|
* @return string |
|
11838
|
|
|
* <p>Return the sub-string.</p> |
|
11839
|
|
|
*/ |
|
11840
|
2 |
|
public static function substr_left(string $haystack, string $needle): string |
|
11841
|
|
|
{ |
|
11842
|
2 |
|
if ($haystack === '') { |
|
11843
|
2 |
|
return ''; |
|
11844
|
|
|
} |
|
11845
|
|
|
|
|
11846
|
2 |
|
if ($needle === '') { |
|
11847
|
2 |
|
return $haystack; |
|
11848
|
|
|
} |
|
11849
|
|
|
|
|
11850
|
2 |
|
if (self::str_starts_with($haystack, $needle)) { |
|
11851
|
2 |
|
$haystack = (string) \mb_substr($haystack, (int) self::strlen($needle)); |
|
11852
|
|
|
} |
|
11853
|
|
|
|
|
11854
|
2 |
|
return $haystack; |
|
11855
|
|
|
} |
|
11856
|
|
|
|
|
11857
|
|
|
/** |
|
11858
|
|
|
* Replace text within a portion of a string. |
|
11859
|
|
|
* |
|
11860
|
|
|
* EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code> |
|
11861
|
|
|
* |
|
11862
|
|
|
* source: https://gist.github.com/stemar/8287074 |
|
11863
|
|
|
* |
|
11864
|
|
|
* @param string|string[] $str <p>The input string or an array of stings.</p> |
|
11865
|
|
|
* @param string|string[] $replacement <p>The replacement string or an array of stings.</p> |
|
11866
|
|
|
* @param int|int[] $offset <p> |
|
11867
|
|
|
* If start is positive, the replacing will begin at the start'th offset |
|
11868
|
|
|
* into string. |
|
11869
|
|
|
* <br><br> |
|
11870
|
|
|
* If start is negative, the replacing will begin at the start'th character |
|
11871
|
|
|
* from the end of string. |
|
11872
|
|
|
* </p> |
|
11873
|
|
|
* @param int|int[]|null $length [optional] <p>If given and is positive, it represents the length of the |
|
11874
|
|
|
* portion of string which is to be replaced. If it is negative, it |
|
11875
|
|
|
* represents the number of characters from the end of string at which to |
|
11876
|
|
|
* stop replacing. If it is not given, then it will default to strlen( |
|
11877
|
|
|
* string ); i.e. end the replacing at the end of string. Of course, if |
|
11878
|
|
|
* length is zero then this function will have the effect of inserting |
|
11879
|
|
|
* replacement into string at the given start offset.</p> |
|
11880
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
11881
|
|
|
* |
|
11882
|
|
|
* @psalm-pure |
|
11883
|
|
|
* |
|
11884
|
|
|
* @return string|string[] |
|
11885
|
|
|
* <p>The result string is returned. If string is an array then array is returned.</p> |
|
11886
|
|
|
* |
|
11887
|
|
|
* @template TSubstrReplace |
|
11888
|
|
|
* @phpstan-param TSubstrReplace $str |
|
11889
|
|
|
* @phpstan-return TSubstrReplace |
|
11890
|
|
|
*/ |
|
11891
|
10 |
|
public static function substr_replace( |
|
11892
|
|
|
$str, |
|
11893
|
|
|
$replacement, |
|
11894
|
|
|
$offset, |
|
11895
|
|
|
$length = null, |
|
11896
|
|
|
string $encoding = 'UTF-8' |
|
11897
|
|
|
) { |
|
11898
|
10 |
|
if (\is_array($str)) { |
|
11899
|
1 |
|
$num = \count($str); |
|
11900
|
|
|
|
|
11901
|
|
|
// the replacement |
|
11902
|
1 |
|
if (\is_array($replacement)) { |
|
11903
|
1 |
|
$replacement = \array_slice($replacement, 0, $num); |
|
11904
|
|
|
} else { |
|
11905
|
1 |
|
$replacement = \array_pad([$replacement], $num, $replacement); |
|
11906
|
|
|
} |
|
11907
|
|
|
|
|
11908
|
|
|
// the offset |
|
11909
|
1 |
|
if (\is_array($offset)) { |
|
11910
|
1 |
|
$offset = \array_slice($offset, 0, $num); |
|
11911
|
1 |
|
foreach ($offset as &$value_tmp) { |
|
11912
|
1 |
|
$value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0; |
|
11913
|
|
|
} |
|
11914
|
1 |
|
unset($value_tmp); |
|
11915
|
|
|
} else { |
|
11916
|
1 |
|
$offset = \array_pad([$offset], $num, $offset); |
|
11917
|
|
|
} |
|
11918
|
|
|
|
|
11919
|
|
|
// the length |
|
11920
|
1 |
|
if ($length === null) { |
|
11921
|
1 |
|
$length = \array_fill(0, $num, 0); |
|
11922
|
1 |
|
} elseif (\is_array($length)) { |
|
11923
|
1 |
|
$length = \array_slice($length, 0, $num); |
|
11924
|
1 |
|
foreach ($length as &$value_tmp_V2) { |
|
11925
|
1 |
|
$value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num; |
|
11926
|
|
|
} |
|
11927
|
1 |
|
unset($value_tmp_V2); |
|
11928
|
|
|
} else { |
|
11929
|
1 |
|
$length = \array_pad([$length], $num, $length); |
|
11930
|
|
|
} |
|
11931
|
|
|
|
|
11932
|
|
|
// recursive call |
|
11933
|
|
|
/** @phpstan-ignore-next-line - phpstan currently can't handle recursive calls */ |
|
11934
|
1 |
|
return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length); |
|
11935
|
|
|
} |
|
11936
|
|
|
|
|
11937
|
10 |
|
if (\is_array($replacement)) { |
|
11938
|
1 |
|
if ($replacement !== []) { |
|
11939
|
1 |
|
$replacement = $replacement[0]; |
|
11940
|
|
|
} else { |
|
11941
|
1 |
|
$replacement = ''; |
|
11942
|
|
|
} |
|
11943
|
|
|
} |
|
11944
|
|
|
|
|
11945
|
|
|
// init |
|
11946
|
10 |
|
$str = (string) $str; |
|
11947
|
10 |
|
$replacement = (string) $replacement; |
|
11948
|
|
|
|
|
11949
|
10 |
|
if (\is_array($length)) { |
|
11950
|
|
|
throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.'); |
|
11951
|
|
|
} |
|
11952
|
|
|
|
|
11953
|
10 |
|
if (\is_array($offset)) { |
|
11954
|
|
|
throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.'); |
|
11955
|
|
|
} |
|
11956
|
|
|
|
|
11957
|
10 |
|
if ($str === '') { |
|
11958
|
1 |
|
return $replacement; |
|
11959
|
|
|
} |
|
11960
|
|
|
|
|
11961
|
9 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
11962
|
9 |
|
$string_length = (int) self::strlen($str, $encoding); |
|
11963
|
|
|
|
|
11964
|
9 |
|
if ($offset < 0) { |
|
11965
|
1 |
|
$offset = (int) \max(0, $string_length + $offset); |
|
11966
|
9 |
|
} elseif ($offset > $string_length) { |
|
11967
|
1 |
|
$offset = $string_length; |
|
11968
|
|
|
} |
|
11969
|
|
|
|
|
11970
|
9 |
|
if ($length !== null && $length < 0) { |
|
11971
|
1 |
|
$length = (int) \max(0, $string_length - $offset + $length); |
|
11972
|
9 |
|
} elseif ($length === null || $length > $string_length) { |
|
11973
|
4 |
|
$length = $string_length; |
|
11974
|
|
|
} |
|
11975
|
|
|
|
|
11976
|
9 |
|
if (($offset + $length) > $string_length) { |
|
11977
|
4 |
|
$length = $string_length - $offset; |
|
11978
|
|
|
} |
|
11979
|
|
|
|
|
11980
|
9 |
|
return ((string) \mb_substr($str, 0, $offset, $encoding)) . |
|
11981
|
9 |
|
$replacement . |
|
11982
|
9 |
|
((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding)); |
|
11983
|
|
|
} |
|
11984
|
|
|
|
|
11985
|
|
|
// |
|
11986
|
|
|
// fallback for ascii only |
|
11987
|
|
|
// |
|
11988
|
|
|
|
|
11989
|
|
|
if (ASCII::is_ascii($str)) { |
|
11990
|
|
|
return ($length === null) ? |
|
11991
|
|
|
\substr_replace($str, $replacement, $offset) : |
|
11992
|
|
|
\substr_replace($str, $replacement, $offset, $length); |
|
11993
|
|
|
} |
|
11994
|
|
|
|
|
11995
|
|
|
// |
|
11996
|
|
|
// fallback via vanilla php |
|
11997
|
|
|
// |
|
11998
|
|
|
|
|
11999
|
|
|
\preg_match_all('/./us', $str, $str_matches); |
|
12000
|
|
|
\preg_match_all('/./us', $replacement, $replacement_matches); |
|
12001
|
|
|
|
|
12002
|
|
|
if ($length === null) { |
|
12003
|
|
|
$length_tmp = self::strlen($str, $encoding); |
|
12004
|
|
|
if ($length_tmp === false) { |
|
12005
|
|
|
// e.g.: non mbstring support + invalid chars |
|
12006
|
|
|
return ''; |
|
12007
|
|
|
} |
|
12008
|
|
|
$length = $length_tmp; |
|
12009
|
|
|
} |
|
12010
|
|
|
|
|
12011
|
|
|
\array_splice($str_matches[0], $offset, $length, $replacement_matches[0]); |
|
12012
|
|
|
|
|
12013
|
|
|
return \implode('', $str_matches[0]); |
|
12014
|
|
|
} |
|
12015
|
|
|
|
|
12016
|
|
|
/** |
|
12017
|
|
|
* Removes a suffix ($needle) from the end of the string ($haystack). |
|
12018
|
|
|
* |
|
12019
|
|
|
* EXAMPLE: <code> |
|
12020
|
|
|
* UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle' |
|
12021
|
|
|
* UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε' |
|
12022
|
|
|
* </code> |
|
12023
|
|
|
* |
|
12024
|
|
|
* @param string $haystack <p>The string to search in.</p> |
|
12025
|
|
|
* @param string $needle <p>The substring to search for.</p> |
|
12026
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
12027
|
|
|
* |
|
12028
|
|
|
* @psalm-pure |
|
12029
|
|
|
* |
|
12030
|
|
|
* @return string |
|
12031
|
|
|
* <p>Return the sub-string.</p> |
|
12032
|
|
|
*/ |
|
12033
|
2 |
|
public static function substr_right( |
|
12034
|
|
|
string $haystack, |
|
12035
|
|
|
string $needle, |
|
12036
|
|
|
string $encoding = 'UTF-8' |
|
12037
|
|
|
): string { |
|
12038
|
2 |
|
if ($haystack === '') { |
|
12039
|
2 |
|
return ''; |
|
12040
|
|
|
} |
|
12041
|
|
|
|
|
12042
|
2 |
|
if ($needle === '') { |
|
12043
|
2 |
|
return $haystack; |
|
12044
|
|
|
} |
|
12045
|
|
|
|
|
12046
|
|
|
if ( |
|
12047
|
2 |
|
$encoding === 'UTF-8' |
|
12048
|
|
|
&& |
|
12049
|
2 |
|
\substr($haystack, -\strlen($needle)) === $needle |
|
12050
|
|
|
) { |
|
12051
|
2 |
|
return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle)); |
|
12052
|
|
|
} |
|
12053
|
|
|
|
|
12054
|
2 |
|
if (\substr($haystack, -\strlen($needle)) === $needle) { |
|
12055
|
|
|
return (string) self::substr( |
|
12056
|
|
|
$haystack, |
|
12057
|
|
|
0, |
|
12058
|
|
|
(int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding), |
|
12059
|
|
|
$encoding |
|
12060
|
|
|
); |
|
12061
|
|
|
} |
|
12062
|
|
|
|
|
12063
|
2 |
|
return $haystack; |
|
12064
|
|
|
} |
|
12065
|
|
|
|
|
12066
|
|
|
/** |
|
12067
|
|
|
* Returns a case swapped version of the string. |
|
12068
|
|
|
* |
|
12069
|
|
|
* EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code> |
|
12070
|
|
|
* |
|
12071
|
|
|
* @param string $str <p>The input string.</p> |
|
12072
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
12073
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
12074
|
|
|
* |
|
12075
|
|
|
* @psalm-pure |
|
12076
|
|
|
* |
|
12077
|
|
|
* @return string |
|
12078
|
|
|
* <p>Each character's case swapped.</p> |
|
12079
|
|
|
*/ |
|
12080
|
6 |
|
public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string |
|
12081
|
|
|
{ |
|
12082
|
6 |
|
if ($str === '') { |
|
12083
|
1 |
|
return ''; |
|
12084
|
|
|
} |
|
12085
|
|
|
|
|
12086
|
6 |
|
if ($clean_utf8) { |
|
12087
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
|
12088
|
|
|
// if invalid characters are found in $haystack before $needle |
|
12089
|
2 |
|
$str = self::clean($str); |
|
12090
|
|
|
} |
|
12091
|
|
|
|
|
12092
|
6 |
|
if ($encoding === 'UTF-8') { |
|
12093
|
4 |
|
return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str); |
|
12094
|
|
|
} |
|
12095
|
|
|
|
|
12096
|
4 |
|
return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str); |
|
12097
|
|
|
} |
|
12098
|
|
|
|
|
12099
|
|
|
/** |
|
12100
|
|
|
* Checks whether symfony-polyfills are used. |
|
12101
|
|
|
* |
|
12102
|
|
|
* @psalm-pure |
|
12103
|
|
|
* |
|
12104
|
|
|
* @return bool |
|
12105
|
|
|
* <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p> |
|
12106
|
|
|
* |
|
12107
|
|
|
* @internal <p>Please do not use it anymore, we will make is private in next major version.</p> |
|
12108
|
|
|
*/ |
|
12109
|
|
|
public static function symfony_polyfill_used(): bool |
|
12110
|
|
|
{ |
|
12111
|
|
|
// init |
|
12112
|
|
|
$return = false; |
|
12113
|
|
|
|
|
12114
|
|
|
$return_tmp = \extension_loaded('mbstring'); |
|
12115
|
|
|
if (!$return_tmp && \function_exists('mb_strlen')) { |
|
12116
|
|
|
$return = true; |
|
12117
|
|
|
} |
|
12118
|
|
|
|
|
12119
|
|
|
$return_tmp = \extension_loaded('iconv'); |
|
12120
|
|
|
if (!$return_tmp && \function_exists('iconv')) { |
|
12121
|
|
|
$return = true; |
|
12122
|
|
|
} |
|
12123
|
|
|
|
|
12124
|
|
|
return $return; |
|
12125
|
|
|
} |
|
12126
|
|
|
|
|
12127
|
|
|
/** |
|
12128
|
|
|
* @param string $str |
|
12129
|
|
|
* @param int $tab_length |
|
12130
|
|
|
* |
|
12131
|
|
|
* @psalm-pure |
|
12132
|
|
|
* |
|
12133
|
|
|
* @return string |
|
12134
|
|
|
*/ |
|
12135
|
6 |
|
public static function tabs_to_spaces(string $str, int $tab_length = 4): string |
|
12136
|
|
|
{ |
|
12137
|
6 |
|
if ($tab_length === 4) { |
|
12138
|
3 |
|
$spaces = ' '; |
|
12139
|
3 |
|
} elseif ($tab_length === 2) { |
|
12140
|
1 |
|
$spaces = ' '; |
|
12141
|
|
|
} else { |
|
12142
|
2 |
|
$spaces = \str_repeat(' ', $tab_length); |
|
12143
|
|
|
} |
|
12144
|
|
|
|
|
12145
|
6 |
|
return \str_replace("\t", $spaces, $str); |
|
12146
|
|
|
} |
|
12147
|
|
|
|
|
12148
|
|
|
/** |
|
12149
|
|
|
* Converts the first character of each word in the string to uppercase |
|
12150
|
|
|
* and all other chars to lowercase. |
|
12151
|
|
|
* |
|
12152
|
|
|
* @param string $str <p>The input string.</p> |
|
12153
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
12154
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
12155
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
|
12156
|
|
|
* tr</p> |
|
12157
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
|
12158
|
|
|
* -> ß</p> |
|
12159
|
|
|
* |
|
12160
|
|
|
* @psalm-pure |
|
12161
|
|
|
* |
|
12162
|
|
|
* @return string |
|
12163
|
|
|
* <p>A string with all characters of $str being title-cased.</p> |
|
12164
|
|
|
*/ |
|
12165
|
5 |
|
public static function titlecase( |
|
12166
|
|
|
string $str, |
|
12167
|
|
|
string $encoding = 'UTF-8', |
|
12168
|
|
|
bool $clean_utf8 = false, |
|
12169
|
|
|
string $lang = null, |
|
12170
|
|
|
bool $try_to_keep_the_string_length = false |
|
12171
|
|
|
): string { |
|
12172
|
5 |
|
if ($clean_utf8) { |
|
12173
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
|
12174
|
|
|
// if invalid characters are found in $haystack before $needle |
|
12175
|
|
|
$str = self::clean($str); |
|
12176
|
|
|
} |
|
12177
|
|
|
|
|
12178
|
|
|
if ( |
|
12179
|
5 |
|
$lang === null |
|
12180
|
|
|
&& |
|
12181
|
5 |
|
!$try_to_keep_the_string_length |
|
12182
|
|
|
) { |
|
12183
|
5 |
|
if ($encoding === 'UTF-8') { |
|
12184
|
3 |
|
return \mb_convert_case($str, \MB_CASE_TITLE); |
|
12185
|
|
|
} |
|
12186
|
|
|
|
|
12187
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
12188
|
|
|
|
|
12189
|
2 |
|
return \mb_convert_case($str, \MB_CASE_TITLE, $encoding); |
|
12190
|
|
|
} |
|
12191
|
|
|
|
|
12192
|
|
|
return self::str_titleize( |
|
12193
|
|
|
$str, |
|
12194
|
|
|
null, |
|
12195
|
|
|
$encoding, |
|
12196
|
|
|
false, |
|
12197
|
|
|
$lang, |
|
12198
|
|
|
$try_to_keep_the_string_length, |
|
12199
|
|
|
false |
|
12200
|
|
|
); |
|
12201
|
|
|
} |
|
12202
|
|
|
|
|
12203
|
|
|
/** |
|
12204
|
|
|
* Convert a string into ASCII. |
|
12205
|
|
|
* |
|
12206
|
|
|
* EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code> |
|
12207
|
|
|
* |
|
12208
|
|
|
* @param string $str <p>The input string.</p> |
|
12209
|
|
|
* @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p> |
|
12210
|
|
|
* @param bool $strict [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad |
|
12211
|
|
|
* performance</p> |
|
12212
|
|
|
* |
|
12213
|
|
|
* @psalm-pure |
|
12214
|
|
|
* |
|
12215
|
|
|
* @return string |
|
12216
|
|
|
*/ |
|
12217
|
37 |
|
public static function to_ascii( |
|
12218
|
|
|
string $str, |
|
12219
|
|
|
string $unknown = '?', |
|
12220
|
|
|
bool $strict = false |
|
12221
|
|
|
): string { |
|
12222
|
37 |
|
return ASCII::to_transliterate($str, $unknown, $strict); |
|
12223
|
|
|
} |
|
12224
|
|
|
|
|
12225
|
|
|
/** |
|
12226
|
|
|
* @param bool|float|int|string $str |
|
12227
|
|
|
* |
|
12228
|
|
|
* @psalm-pure |
|
12229
|
|
|
* |
|
12230
|
|
|
* @return bool |
|
12231
|
|
|
*/ |
|
12232
|
25 |
|
public static function to_boolean($str): bool |
|
12233
|
|
|
{ |
|
12234
|
|
|
// init |
|
12235
|
25 |
|
$str = (string) $str; |
|
12236
|
|
|
|
|
12237
|
25 |
|
if ($str === '') { |
|
12238
|
2 |
|
return false; |
|
12239
|
|
|
} |
|
12240
|
|
|
|
|
12241
|
|
|
// Info: http://php.net/manual/en/filter.filters.validate.php |
|
12242
|
|
|
$map = [ |
|
12243
|
23 |
|
'true' => true, |
|
12244
|
|
|
'1' => true, |
|
12245
|
|
|
'on' => true, |
|
12246
|
|
|
'yes' => true, |
|
12247
|
|
|
'false' => false, |
|
12248
|
|
|
'0' => false, |
|
12249
|
|
|
'off' => false, |
|
12250
|
|
|
'no' => false, |
|
12251
|
|
|
]; |
|
12252
|
|
|
|
|
12253
|
23 |
|
if (isset($map[$str])) { |
|
12254
|
13 |
|
return $map[$str]; |
|
12255
|
|
|
} |
|
12256
|
|
|
|
|
12257
|
10 |
|
$key = \strtolower($str); |
|
12258
|
10 |
|
if (isset($map[$key])) { |
|
12259
|
2 |
|
return $map[$key]; |
|
12260
|
|
|
} |
|
12261
|
|
|
|
|
12262
|
8 |
|
if (\is_numeric($str)) { |
|
12263
|
6 |
|
return ((float) $str) > 0; |
|
12264
|
|
|
} |
|
12265
|
|
|
|
|
12266
|
2 |
|
return (bool) \trim($str); |
|
12267
|
|
|
} |
|
12268
|
|
|
|
|
12269
|
|
|
/** |
|
12270
|
|
|
* Convert given string to safe filename (and keep string case). |
|
12271
|
|
|
* |
|
12272
|
|
|
* @param string $str |
|
12273
|
|
|
* @param bool $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are |
|
12274
|
|
|
* simply replaced with hyphen. |
|
12275
|
|
|
* @param string $fallback_char |
|
12276
|
|
|
* |
|
12277
|
|
|
* @psalm-pure |
|
12278
|
|
|
* |
|
12279
|
|
|
* @return string |
|
12280
|
|
|
*/ |
|
12281
|
1 |
|
public static function to_filename( |
|
12282
|
|
|
string $str, |
|
12283
|
|
|
bool $use_transliterate = false, |
|
12284
|
|
|
string $fallback_char = '-' |
|
12285
|
|
|
): string { |
|
12286
|
1 |
|
return ASCII::to_filename( |
|
12287
|
1 |
|
$str, |
|
12288
|
1 |
|
$use_transliterate, |
|
12289
|
1 |
|
$fallback_char |
|
12290
|
|
|
); |
|
12291
|
|
|
} |
|
12292
|
|
|
|
|
12293
|
|
|
/** |
|
12294
|
|
|
* Convert a string into "ISO-8859"-encoding (Latin-1). |
|
12295
|
|
|
* |
|
12296
|
|
|
* EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859(' -ABC-中文空白- ')); // ' -ABC-????- '</code> |
|
12297
|
|
|
* |
|
12298
|
|
|
* @param string|string[] $str |
|
12299
|
|
|
* |
|
12300
|
|
|
* @psalm-pure |
|
12301
|
|
|
* |
|
12302
|
|
|
* @return string|string[] |
|
12303
|
|
|
* |
|
12304
|
|
|
* @template TToIso8859 |
|
12305
|
|
|
* @phpstan-param TToIso8859 $str |
|
12306
|
|
|
* @phpstan-return TToIso8859 |
|
12307
|
|
|
*/ |
|
12308
|
8 |
|
public static function to_iso8859($str) |
|
12309
|
|
|
{ |
|
12310
|
8 |
|
if (\is_array($str)) { |
|
12311
|
2 |
|
foreach ($str as &$v) { |
|
12312
|
2 |
|
$v = self::to_iso8859($v); |
|
12313
|
|
|
} |
|
12314
|
|
|
|
|
12315
|
2 |
|
return $str; |
|
12316
|
|
|
} |
|
12317
|
|
|
|
|
12318
|
8 |
|
$str = (string) $str; |
|
12319
|
8 |
|
if ($str === '') { |
|
12320
|
2 |
|
return ''; |
|
12321
|
|
|
} |
|
12322
|
|
|
|
|
12323
|
8 |
|
return self::utf8_decode($str); |
|
12324
|
|
|
} |
|
12325
|
|
|
|
|
12326
|
|
|
/** |
|
12327
|
|
|
* This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8. |
|
12328
|
|
|
* |
|
12329
|
|
|
* <ul> |
|
12330
|
|
|
* <li>It decode UTF-8 codepoints and Unicode escape sequences.</li> |
|
12331
|
|
|
* <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li> |
|
12332
|
|
|
* <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this |
|
12333
|
|
|
* case.</li> |
|
12334
|
|
|
* </ul> |
|
12335
|
|
|
* |
|
12336
|
|
|
* EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code> |
|
12337
|
|
|
* |
|
12338
|
|
|
* @param string|string[] $str <p>Any string or array of strings.</p> |
|
12339
|
|
|
* @param bool $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p> |
|
12340
|
|
|
* |
|
12341
|
|
|
* @psalm-pure |
|
12342
|
|
|
* |
|
12343
|
|
|
* @return string|string[] |
|
12344
|
|
|
* <p>The UTF-8 encoded string</p> |
|
12345
|
|
|
* |
|
12346
|
|
|
* @template TToUtf8 |
|
12347
|
|
|
* @phpstan-param TToUtf8 $str |
|
12348
|
|
|
* @phpstan-return TToUtf8 |
|
12349
|
|
|
*/ |
|
12350
|
42 |
|
public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false) |
|
12351
|
|
|
{ |
|
12352
|
42 |
|
if (\is_array($str)) { |
|
12353
|
4 |
|
foreach ($str as &$v) { |
|
12354
|
4 |
|
$v = self::to_utf8_string($v, $decode_html_entity_to_utf8); |
|
12355
|
|
|
} |
|
12356
|
|
|
|
|
12357
|
|
|
/** @phpstan-var TToUtf8 $str */ |
|
12358
|
4 |
|
return $str; |
|
12359
|
|
|
} |
|
12360
|
|
|
|
|
12361
|
|
|
/** @phpstan-var TToUtf8 $str */ |
|
12362
|
42 |
|
$str = self::to_utf8_string($str, $decode_html_entity_to_utf8); |
|
12363
|
|
|
|
|
12364
|
42 |
|
return $str; |
|
12365
|
|
|
} |
|
12366
|
|
|
|
|
12367
|
|
|
/** |
|
12368
|
|
|
* This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8. |
|
12369
|
|
|
* |
|
12370
|
|
|
* <ul> |
|
12371
|
|
|
* <li>It decode UTF-8 codepoints and Unicode escape sequences.</li> |
|
12372
|
|
|
* <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li> |
|
12373
|
|
|
* <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this |
|
12374
|
|
|
* case.</li> |
|
12375
|
|
|
* </ul> |
|
12376
|
|
|
* |
|
12377
|
|
|
* EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code> |
|
12378
|
|
|
* |
|
12379
|
|
|
* @param string $str <p>Any string.</p> |
|
12380
|
|
|
* @param bool $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p> |
|
12381
|
|
|
* |
|
12382
|
|
|
* @psalm-pure |
|
12383
|
|
|
* |
|
12384
|
|
|
* @return string |
|
12385
|
|
|
* <p>The UTF-8 encoded string</p> |
|
12386
|
|
|
*/ |
|
12387
|
42 |
|
public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string |
|
12388
|
|
|
{ |
|
12389
|
42 |
|
if ($str === '') { |
|
12390
|
7 |
|
return $str; |
|
12391
|
|
|
} |
|
12392
|
|
|
|
|
12393
|
42 |
|
$max = \strlen($str); |
|
12394
|
42 |
|
$buf = ''; |
|
12395
|
|
|
|
|
12396
|
42 |
|
for ($i = 0; $i < $max; ++$i) { |
|
12397
|
42 |
|
$c1 = $str[$i]; |
|
12398
|
|
|
|
|
12399
|
42 |
|
if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already |
|
12400
|
|
|
|
|
12401
|
38 |
|
if ($c1 <= "\xDF") { // looks like 2 bytes UTF8 |
|
12402
|
|
|
|
|
12403
|
35 |
|
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; |
|
12404
|
|
|
|
|
12405
|
35 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already |
|
12406
|
21 |
|
$buf .= $c1 . $c2; |
|
12407
|
21 |
|
++$i; |
|
12408
|
|
|
} else { // not valid UTF8 - convert it |
|
12409
|
35 |
|
$buf .= self::to_utf8_convert_helper($c1); |
|
12410
|
|
|
} |
|
12411
|
35 |
|
} elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8 |
|
12412
|
|
|
|
|
12413
|
34 |
|
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; |
|
12414
|
34 |
|
$c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2]; |
|
12415
|
|
|
|
|
12416
|
34 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already |
|
12417
|
17 |
|
$buf .= $c1 . $c2 . $c3; |
|
12418
|
17 |
|
$i += 2; |
|
12419
|
|
|
} else { // not valid UTF8 - convert it |
|
12420
|
34 |
|
$buf .= self::to_utf8_convert_helper($c1); |
|
12421
|
|
|
} |
|
12422
|
28 |
|
} elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8 |
|
12423
|
|
|
|
|
12424
|
28 |
|
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; |
|
12425
|
28 |
|
$c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2]; |
|
12426
|
28 |
|
$c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3]; |
|
12427
|
|
|
|
|
12428
|
28 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already |
|
12429
|
10 |
|
$buf .= $c1 . $c2 . $c3 . $c4; |
|
12430
|
10 |
|
$i += 3; |
|
12431
|
|
|
} else { // not valid UTF8 - convert it |
|
12432
|
28 |
|
$buf .= self::to_utf8_convert_helper($c1); |
|
12433
|
|
|
} |
|
12434
|
|
|
} else { // doesn't look like UTF8, but should be converted |
|
12435
|
|
|
|
|
12436
|
38 |
|
$buf .= self::to_utf8_convert_helper($c1); |
|
12437
|
|
|
} |
|
12438
|
40 |
|
} elseif (($c1 & "\xC0") === "\x80") { // needs conversion |
|
12439
|
|
|
|
|
12440
|
4 |
|
$buf .= self::to_utf8_convert_helper($c1); |
|
12441
|
|
|
} else { // it doesn't need conversion |
|
12442
|
|
|
|
|
12443
|
40 |
|
$buf .= $c1; |
|
12444
|
|
|
} |
|
12445
|
|
|
} |
|
12446
|
|
|
|
|
12447
|
|
|
// decode unicode escape sequences + unicode surrogate pairs |
|
12448
|
42 |
|
$buf = \preg_replace_callback( |
|
12449
|
42 |
|
'/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/', |
|
12450
|
|
|
/** |
|
12451
|
|
|
* @param array $matches |
|
12452
|
|
|
* |
|
12453
|
|
|
* @psalm-pure |
|
12454
|
|
|
* |
|
12455
|
|
|
* @return string |
|
12456
|
|
|
*/ |
|
12457
|
|
|
static function (array $matches): string { |
|
12458
|
13 |
|
if (isset($matches[3])) { |
|
12459
|
13 |
|
$cp = (int) \hexdec($matches[3]); |
|
12460
|
|
|
} else { |
|
12461
|
|
|
// http://unicode.org/faq/utf_bom.html#utf16-4 |
|
12462
|
1 |
|
$cp = ((int) \hexdec($matches[1]) << 10) |
|
12463
|
1 |
|
+ (int) \hexdec($matches[2]) |
|
12464
|
1 |
|
+ 0x10000 |
|
12465
|
1 |
|
- (0xD800 << 10) |
|
12466
|
1 |
|
- 0xDC00; |
|
12467
|
|
|
} |
|
12468
|
|
|
|
|
12469
|
|
|
// https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471 |
|
12470
|
|
|
// |
|
12471
|
|
|
// php_utf32_utf8(unsigned char *buf, unsigned k) |
|
12472
|
|
|
|
|
12473
|
13 |
|
if ($cp < 0x80) { |
|
12474
|
8 |
|
return (string) self::chr($cp); |
|
12475
|
|
|
} |
|
12476
|
|
|
|
|
12477
|
10 |
|
if ($cp < 0xA0) { |
|
12478
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
|
12479
|
|
|
return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F); |
|
12480
|
|
|
} |
|
12481
|
|
|
|
|
12482
|
10 |
|
return self::decimal_to_chr($cp); |
|
12483
|
42 |
|
}, |
|
12484
|
42 |
|
$buf |
|
12485
|
|
|
); |
|
12486
|
|
|
|
|
12487
|
42 |
|
if ($buf === null) { |
|
12488
|
|
|
return ''; |
|
12489
|
|
|
} |
|
12490
|
|
|
|
|
12491
|
|
|
// decode UTF-8 codepoints |
|
12492
|
42 |
|
if ($decode_html_entity_to_utf8) { |
|
12493
|
3 |
|
$buf = self::html_entity_decode($buf); |
|
12494
|
|
|
} |
|
12495
|
|
|
|
|
12496
|
42 |
|
return $buf; |
|
12497
|
|
|
} |
|
12498
|
|
|
|
|
12499
|
|
|
/** |
|
12500
|
|
|
* Returns the given string as an integer, or null if the string isn't numeric. |
|
12501
|
|
|
* |
|
12502
|
|
|
* @param string $str |
|
12503
|
|
|
* |
|
12504
|
|
|
* @psalm-pure |
|
12505
|
|
|
* |
|
12506
|
|
|
* @return int|null |
|
12507
|
|
|
* <p>null if the string isn't numeric</p> |
|
12508
|
|
|
*/ |
|
12509
|
1 |
|
public static function to_int(string $str) |
|
12510
|
|
|
{ |
|
12511
|
1 |
|
if (\is_numeric($str)) { |
|
12512
|
1 |
|
return (int) $str; |
|
12513
|
|
|
} |
|
12514
|
|
|
|
|
12515
|
1 |
|
return null; |
|
12516
|
|
|
} |
|
12517
|
|
|
|
|
12518
|
|
|
/** |
|
12519
|
|
|
* Returns the given input as string, or null if the input isn't int|float|string |
|
12520
|
|
|
* and do not implement the "__toString()" method. |
|
12521
|
|
|
* |
|
12522
|
|
|
* @param float|int|object|string|null $input |
|
12523
|
|
|
* |
|
12524
|
|
|
* @psalm-pure |
|
12525
|
|
|
* |
|
12526
|
|
|
* @return string|null |
|
12527
|
|
|
* <p>null if the input isn't int|float|string and has no "__toString()" method</p> |
|
12528
|
|
|
*/ |
|
12529
|
1 |
|
public static function to_string($input) |
|
12530
|
|
|
{ |
|
12531
|
1 |
|
if ($input === null) { |
|
12532
|
|
|
return null; |
|
12533
|
|
|
} |
|
12534
|
|
|
|
|
12535
|
|
|
/** @var string $input_type - hack for psalm */ |
|
12536
|
1 |
|
$input_type = \gettype($input); |
|
12537
|
|
|
|
|
12538
|
|
|
if ( |
|
12539
|
1 |
|
$input_type === 'string' |
|
12540
|
|
|
|| |
|
12541
|
1 |
|
$input_type === 'integer' |
|
12542
|
|
|
|| |
|
12543
|
1 |
|
$input_type === 'float' |
|
12544
|
|
|
|| |
|
12545
|
1 |
|
$input_type === 'double' |
|
12546
|
|
|
) { |
|
12547
|
1 |
|
return (string) $input; |
|
12548
|
|
|
} |
|
12549
|
|
|
|
|
12550
|
|
|
/** @phpstan-ignore-next-line - "gettype": FP? */ |
|
12551
|
1 |
|
if ($input_type === 'object' && \method_exists($input, '__toString')) { |
|
12552
|
1 |
|
return (string) $input; |
|
12553
|
|
|
} |
|
12554
|
|
|
|
|
12555
|
1 |
|
return null; |
|
12556
|
|
|
} |
|
12557
|
|
|
|
|
12558
|
|
|
/** |
|
12559
|
|
|
* Strip whitespace or other characters from the beginning and end of a UTF-8 string. |
|
12560
|
|
|
* |
|
12561
|
|
|
* INFO: This is slower then "trim()" |
|
12562
|
|
|
* |
|
12563
|
|
|
* We can only use the original-function, if we use <= 7-Bit in the string / chars |
|
12564
|
|
|
* but the check for ASCII (7-Bit) cost more time, then we can safe here. |
|
12565
|
|
|
* |
|
12566
|
|
|
* EXAMPLE: <code>UTF8::trim(' -ABC-中文空白- '); // '-ABC-中文空白-'</code> |
|
12567
|
|
|
* |
|
12568
|
|
|
* @param string $str <p>The string to be trimmed</p> |
|
12569
|
|
|
* @param string|null $chars [optional] <p>Optional characters to be stripped</p> |
|
12570
|
|
|
* |
|
12571
|
|
|
* @psalm-pure |
|
12572
|
|
|
* |
|
12573
|
|
|
* @return string |
|
12574
|
|
|
* <p>The trimmed string.</p> |
|
12575
|
|
|
*/ |
|
12576
|
57 |
|
public static function trim(string $str = '', string $chars = null): string |
|
12577
|
|
|
{ |
|
12578
|
57 |
|
if ($str === '') { |
|
12579
|
9 |
|
return ''; |
|
12580
|
|
|
} |
|
12581
|
|
|
|
|
12582
|
50 |
|
if (self::$SUPPORT['mbstring'] === true) { |
|
12583
|
50 |
|
if ($chars !== null) { |
|
12584
|
|
|
/** @noinspection PregQuoteUsageInspection */ |
|
12585
|
28 |
|
$chars = \preg_quote($chars); |
|
12586
|
28 |
|
$pattern = "^[${chars}]+|[${chars}]+\$"; |
|
12587
|
|
|
} else { |
|
12588
|
22 |
|
$pattern = '^[\\s]+|[\\s]+$'; |
|
12589
|
|
|
} |
|
12590
|
|
|
|
|
12591
|
50 |
|
return (string) \mb_ereg_replace($pattern, '', $str); |
|
12592
|
|
|
} |
|
12593
|
|
|
|
|
12594
|
8 |
|
if ($chars !== null) { |
|
12595
|
|
|
$chars = \preg_quote($chars, '/'); |
|
12596
|
|
|
$pattern = "^[${chars}]+|[${chars}]+\$"; |
|
12597
|
|
|
} else { |
|
12598
|
8 |
|
$pattern = '^[\\s]+|[\\s]+$'; |
|
12599
|
|
|
} |
|
12600
|
|
|
|
|
12601
|
8 |
|
return self::regex_replace($str, $pattern, ''); |
|
12602
|
|
|
} |
|
12603
|
|
|
|
|
12604
|
|
|
/** |
|
12605
|
|
|
* Makes string's first char uppercase. |
|
12606
|
|
|
* |
|
12607
|
|
|
* EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code> |
|
12608
|
|
|
* |
|
12609
|
|
|
* @param string $str <p>The input string.</p> |
|
12610
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
|
12611
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
12612
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
|
12613
|
|
|
* tr</p> |
|
12614
|
|
|
* @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ |
|
12615
|
|
|
* -> ß</p> |
|
12616
|
|
|
* |
|
12617
|
|
|
* @psalm-pure |
|
12618
|
|
|
* |
|
12619
|
|
|
* @return string |
|
12620
|
|
|
* <p>The resulting string with with char uppercase.</p> |
|
12621
|
|
|
*/ |
|
12622
|
69 |
|
public static function ucfirst( |
|
12623
|
|
|
string $str, |
|
12624
|
|
|
string $encoding = 'UTF-8', |
|
12625
|
|
|
bool $clean_utf8 = false, |
|
12626
|
|
|
string $lang = null, |
|
12627
|
|
|
bool $try_to_keep_the_string_length = false |
|
12628
|
|
|
): string { |
|
12629
|
69 |
|
if ($str === '') { |
|
12630
|
3 |
|
return ''; |
|
12631
|
|
|
} |
|
12632
|
|
|
|
|
12633
|
68 |
|
if ($clean_utf8) { |
|
12634
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
|
12635
|
|
|
// if invalid characters are found in $haystack before $needle |
|
12636
|
1 |
|
$str = self::clean($str); |
|
12637
|
|
|
} |
|
12638
|
|
|
|
|
12639
|
68 |
|
$use_mb_functions = $lang === null && !$try_to_keep_the_string_length; |
|
12640
|
|
|
|
|
12641
|
68 |
|
if ($encoding === 'UTF-8') { |
|
12642
|
22 |
|
$str_part_two = (string) \mb_substr($str, 1); |
|
12643
|
|
|
|
|
12644
|
22 |
|
if ($use_mb_functions) { |
|
12645
|
22 |
|
$str_part_one = \mb_strtoupper( |
|
12646
|
22 |
|
(string) \mb_substr($str, 0, 1) |
|
12647
|
|
|
); |
|
12648
|
|
|
} else { |
|
12649
|
|
|
$str_part_one = self::strtoupper( |
|
12650
|
|
|
(string) \mb_substr($str, 0, 1), |
|
12651
|
|
|
$encoding, |
|
12652
|
|
|
false, |
|
12653
|
|
|
$lang, |
|
12654
|
22 |
|
$try_to_keep_the_string_length |
|
12655
|
|
|
); |
|
12656
|
|
|
} |
|
12657
|
|
|
} else { |
|
12658
|
47 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
|
12659
|
|
|
|
|
12660
|
47 |
|
$str_part_two = (string) self::substr($str, 1, null, $encoding); |
|
12661
|
|
|
|
|
12662
|
47 |
|
if ($use_mb_functions) { |
|
12663
|
47 |
|
$str_part_one = \mb_strtoupper( |
|
12664
|
47 |
|
(string) \mb_substr($str, 0, 1, $encoding), |
|
12665
|
47 |
|
$encoding |
|
12666
|
|
|
); |
|
12667
|
|
|
} else { |
|
12668
|
|
|
$str_part_one = self::strtoupper( |
|
12669
|
|
|
(string) self::substr($str, 0, 1, $encoding), |
|
12670
|
|
|
$encoding, |
|
12671
|
|
|
false, |
|
12672
|
|
|
$lang, |
|
12673
|
|
|
$try_to_keep_the_string_length |
|
12674
|
|
|
); |
|
12675
|
|
|
} |
|
12676
|
|
|
} |
|
12677
|
|
|
|
|
12678
|
68 |
|
return $str_part_one . $str_part_two; |
|
12679
|
|
|
} |
|
12680
|
|
|
|
|
12681
|
|
|
/** |
|
12682
|
|
|
* Uppercase for all words in the string. |
|
12683
|
|
|
* |
|
12684
|
|
|
* EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code> |
|
12685
|
|
|
* |
|
12686
|
|
|
* @param string $str <p>The input string.</p> |
|
12687
|
|
|
* @param string[] $exceptions [optional] <p>Exclusion for some words.</p> |
|
12688
|
|
|
* @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new |
|
12689
|
|
|
* word.</p> |
|
12690
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
|
12691
|
|
|
* @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
|
12692
|
|
|
* |
|
12693
|
|
|
* @psalm-pure |
|
12694
|
|
|
* |
|
12695
|
|
|
* @return string |
|
12696
|
|
|
*/ |
|
12697
|
9 |
|
public static function ucwords( |
|
12698
|
|
|
string $str, |
|
12699
|
|
|
array $exceptions = [], |
|
12700
|
|
|
string $char_list = '', |
|
12701
|
|
|
string $encoding = 'UTF-8', |
|
12702
|
|
|
bool $clean_utf8 = false |
|
12703
|
|
|
): string { |
|
12704
|
9 |
|
if (!$str) { |
|
12705
|
2 |
|
return ''; |
|
12706
|
|
|
} |
|
12707
|
|
|
|
|
12708
|
|
|
// INFO: mb_convert_case($str, MB_CASE_TITLE); |
|
12709
|
|
|
// -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters |
|
12710
|
|
|
|
|
12711
|
8 |
|
if ($clean_utf8) { |
|
12712
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
|
12713
|
|
|
// if invalid characters are found in $haystack before $needle |
|
12714
|
1 |
|
$str = self::clean($str); |
|
12715
|
|
|
} |
|
12716
|
|
|
|
|
12717
|
8 |
|
$use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions)); |
|
12718
|
|
|
|
|
12719
|
|
|
if ( |
|
12720
|
8 |
|
$use_php_default_functions |
|
12721
|
|
|
&& |
|
12722
|
8 |
|
ASCII::is_ascii($str) |
|
12723
|
|
|
) { |
|
12724
|
|
|
return \ucwords($str); |
|
12725
|
|
|
} |
|
12726
|
|
|
|
|
12727
|
8 |
|
$words = self::str_to_words($str, $char_list); |
|
12728
|
8 |
|
$use_exceptions = $exceptions !== []; |
|
12729
|
|
|
|
|
12730
|
8 |
|
$words_str = ''; |
|
12731
|
8 |
|
foreach ($words as &$word) { |
|
12732
|
8 |
|
if (!$word) { |
|
12733
|
8 |
|
continue; |
|
12734
|
|
|
} |
|
12735
|
|
|
|
|
12736
|
|
|
if ( |
|
12737
|
8 |
|
!$use_exceptions |
|
12738
|
|
|
|| |
|
12739
|
8 |
|
!\in_array($word, $exceptions, true) |
|
12740
|
|
|
) { |
|
12741
|
8 |
|
$words_str .= self::ucfirst($word, $encoding); |
|
12742
|
|
|
} else { |
|
12743
|
8 |
|
$words_str .= $word; |
|
12744
|
|
|
} |
|
12745
|
|
|
} |
|
12746
|
|
|
|
|
12747
|
8 |
|
return $words_str; |
|
12748
|
|
|
} |
|
12749
|
|
|
|
|
12750
|
|
|
/** |
|
12751
|
|
|
* Multi decode HTML entity + fix urlencoded-win1252-chars. |
|
12752
|
|
|
* |
|
12753
|
|
|
* EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code> |
|
12754
|
|
|
* |
|
12755
|
|
|
* e.g: |
|
12756
|
|
|
* 'test+test' => 'test test' |
|
12757
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
|
12758
|
|
|
* 'D%FCsseldorf' => 'Düsseldorf' |
|
12759
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
|
12760
|
|
|
* 'D%26%23xFC%3Bsseldorf' => 'Düsseldorf' |
|
12761
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
|
12762
|
|
|
* 'D%C3%BCsseldorf' => 'Düsseldorf' |
|
12763
|
|
|
* 'D%C3%83%C2%BCsseldorf' => 'Düsseldorf' |
|
12764
|
|
|
* 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf' |
|
12765
|
|
|
* |
|
12766
|
|
|
* @param string $str <p>The input string.</p> |
|
12767
|
|
|
* @param bool $multi_decode <p>Decode as often as possible.</p> |
|
12768
|
|
|
* |
|
12769
|
|
|
* @psalm-pure |
|
12770
|
|
|
* |
|
12771
|
|
|
* @return string |
|
12772
|
|
|
*/ |
|
12773
|
4 |
|
public static function urldecode(string $str, bool $multi_decode = true): string |
|
12774
|
|
|
{ |
|
12775
|
4 |
|
if ($str === '') { |
|
12776
|
3 |
|
return ''; |
|
12777
|
|
|
} |
|
12778
|
|
|
|
|
12779
|
4 |
|
$str = self::urldecode_unicode_helper($str); |
|
12780
|
|
|
|
|
12781
|
4 |
|
if ($multi_decode) { |
|
12782
|
|
|
do { |
|
12783
|
3 |
|
$str_compare = $str; |
|
12784
|
|
|
|
|
12785
|
|
|
/** |
|
12786
|
|
|
* @psalm-suppress PossiblyInvalidArgument |
|
12787
|
|
|
*/ |
|
12788
|
3 |
|
$str = \urldecode( |
|
12789
|
3 |
|
self::html_entity_decode( |
|
12790
|
3 |
|
self::to_utf8($str), |
|
12791
|
3 |
|
\ENT_QUOTES | \ENT_HTML5 |
|
12792
|
|
|
) |
|
12793
|
|
|
); |
|
12794
|
3 |
|
} while ($str_compare !== $str); |
|
12795
|
|
|
} else { |
|
12796
|
|
|
/** |
|
12797
|
|
|
* @psalm-suppress PossiblyInvalidArgument |
|
12798
|
|
|
*/ |
|
12799
|
1 |
|
$str = \urldecode( |
|
12800
|
1 |
|
self::html_entity_decode( |
|
12801
|
1 |
|
self::to_utf8($str), |
|
12802
|
1 |
|
\ENT_QUOTES | \ENT_HTML5 |
|
12803
|
|
|
) |
|
12804
|
|
|
); |
|
12805
|
|
|
} |
|
12806
|
|
|
|
|
12807
|
4 |
|
return self::fix_simple_utf8($str); |
|
12808
|
|
|
} |
|
12809
|
|
|
|
|
12810
|
|
|
/** |
|
12811
|
|
|
* Decodes a UTF-8 string to ISO-8859-1. |
|
12812
|
|
|
* |
|
12813
|
|
|
* EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code> |
|
12814
|
|
|
* |
|
12815
|
|
|
* @param string $str <p>The input string.</p> |
|
12816
|
|
|
* @param bool $keep_utf8_chars |
|
12817
|
|
|
* |
|
12818
|
|
|
* @psalm-pure |
|
12819
|
|
|
* |
|
12820
|
|
|
* @return string |
|
12821
|
|
|
*/ |
|
12822
|
14 |
|
public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string |
|
12823
|
|
|
{ |
|
12824
|
14 |
|
if ($str === '') { |
|
12825
|
6 |
|
return ''; |
|
12826
|
|
|
} |
|
12827
|
|
|
|
|
12828
|
|
|
// save for later comparision |
|
12829
|
14 |
|
$str_backup = $str; |
|
12830
|
14 |
|
$len = \strlen($str); |
|
12831
|
|
|
|
|
12832
|
14 |
|
if (self::$ORD === null) { |
|
12833
|
|
|
self::$ORD = self::getData('ord'); |
|
12834
|
|
|
} |
|
12835
|
|
|
|
|
12836
|
14 |
|
if (self::$CHR === null) { |
|
12837
|
|
|
self::$CHR = self::getData('chr'); |
|
12838
|
|
|
} |
|
12839
|
|
|
|
|
12840
|
14 |
|
$no_char_found = '?'; |
|
12841
|
14 |
|
for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) { |
|
12842
|
14 |
|
switch ($str[$i] & "\xF0") { |
|
12843
|
14 |
|
case "\xC0": |
|
12844
|
13 |
|
case "\xD0": |
|
12845
|
13 |
|
$c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"]; |
|
12846
|
13 |
|
$str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found; |
|
12847
|
|
|
|
|
12848
|
13 |
|
break; |
|
12849
|
|
|
|
|
12850
|
13 |
|
case "\xF0": |
|
12851
|
|
|
++$i; |
|
12852
|
|
|
|
|
12853
|
|
|
// no break |
|
12854
|
|
|
|
|
12855
|
13 |
|
case "\xE0": |
|
12856
|
11 |
|
$str[$j] = $no_char_found; |
|
12857
|
11 |
|
$i += 2; |
|
12858
|
|
|
|
|
12859
|
11 |
|
break; |
|
12860
|
|
|
|
|
12861
|
|
|
default: |
|
12862
|
12 |
|
$str[$j] = $str[$i]; |
|
12863
|
|
|
} |
|
12864
|
|
|
} |
|
12865
|
|
|
|
|
12866
|
|
|
/** @var false|string $return - needed for PhpStan (stubs error) */ |
|
12867
|
14 |
|
$return = \substr($str, 0, $j); |
|
12868
|
14 |
|
if ($return === false) { |
|
12869
|
|
|
$return = ''; |
|
12870
|
|
|
} |
|
12871
|
|
|
|
|
12872
|
|
|
if ( |
|
12873
|
14 |
|
$keep_utf8_chars |
|
12874
|
|
|
&& |
|
12875
|
14 |
|
(int) self::strlen($return) >= (int) self::strlen($str_backup) |
|
12876
|
|
|
) { |
|
12877
|
2 |
|
return $str_backup; |
|
12878
|
|
|
} |
|
12879
|
|
|
|
|
12880
|
14 |
|
return $return; |
|
12881
|
|
|
} |
|
12882
|
|
|
|
|
12883
|
|
|
/** |
|
12884
|
|
|
* Encodes an ISO-8859-1 string to UTF-8. |
|
12885
|
|
|
* |
|
12886
|
|
|
* EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code> |
|
12887
|
|
|
* |
|
12888
|
|
|
* @param string $str <p>The input string.</p> |
|
12889
|
|
|
* |
|
12890
|
|
|
* @psalm-pure |
|
12891
|
|
|
* |
|
12892
|
|
|
* @return string |
|
12893
|
|
|
*/ |
|
12894
|
16 |
|
public static function utf8_encode(string $str): string |
|
12895
|
|
|
{ |
|
12896
|
16 |
|
if ($str === '') { |
|
12897
|
14 |
|
return ''; |
|
12898
|
|
|
} |
|
12899
|
|
|
|
|
12900
|
|
|
/** @var false|string $str - the polyfill maybe return false */ |
|
12901
|
16 |
|
$str = \utf8_encode($str); |
|
|
|
|
|
|
12902
|
|
|
|
|
12903
|
16 |
|
if ($str === false) { |
|
12904
|
|
|
return ''; |
|
12905
|
|
|
} |
|
12906
|
|
|
|
|
12907
|
16 |
|
return $str; |
|
12908
|
|
|
} |
|
12909
|
|
|
|
|
12910
|
|
|
/** |
|
12911
|
|
|
* Returns an array with all utf8 whitespace characters. |
|
12912
|
|
|
* |
|
12913
|
|
|
* @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html |
|
12914
|
|
|
* |
|
12915
|
|
|
* @psalm-pure |
|
12916
|
|
|
* |
|
12917
|
|
|
* @return string[] |
|
12918
|
|
|
* An array with all known whitespace characters as values and the type of whitespace as keys |
|
12919
|
|
|
* as defined in above URL |
|
12920
|
|
|
*/ |
|
12921
|
2 |
|
public static function whitespace_table(): array |
|
12922
|
|
|
{ |
|
12923
|
2 |
|
return self::$WHITESPACE_TABLE; |
|
12924
|
|
|
} |
|
12925
|
|
|
|
|
12926
|
|
|
/** |
|
12927
|
|
|
* Limit the number of words in a string. |
|
12928
|
|
|
* |
|
12929
|
|
|
* EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code> |
|
12930
|
|
|
* |
|
12931
|
|
|
* @param string $str <p>The input string.</p> |
|
12932
|
|
|
* @param int $limit <p>The limit of words as integer.</p> |
|
12933
|
|
|
* @param string $str_add_on <p>Replacement for the striped string.</p> |
|
12934
|
|
|
* |
|
12935
|
|
|
* @psalm-pure |
|
12936
|
|
|
* |
|
12937
|
|
|
* @return string |
|
12938
|
|
|
*/ |
|
12939
|
2 |
|
public static function words_limit( |
|
12940
|
|
|
string $str, |
|
12941
|
|
|
int $limit = 100, |
|
12942
|
|
|
string $str_add_on = '…' |
|
12943
|
|
|
): string { |
|
12944
|
2 |
|
if ($str === '' || $limit < 1) { |
|
12945
|
2 |
|
return ''; |
|
12946
|
|
|
} |
|
12947
|
|
|
|
|
12948
|
2 |
|
\preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches); |
|
12949
|
|
|
|
|
12950
|
|
|
if ( |
|
12951
|
2 |
|
!isset($matches[0]) |
|
12952
|
|
|
|| |
|
12953
|
2 |
|
\mb_strlen($str) === (int) \mb_strlen($matches[0]) |
|
12954
|
|
|
) { |
|
12955
|
2 |
|
return $str; |
|
12956
|
|
|
} |
|
12957
|
|
|
|
|
12958
|
2 |
|
return \rtrim($matches[0]) . $str_add_on; |
|
12959
|
|
|
} |
|
12960
|
|
|
|
|
12961
|
|
|
/** |
|
12962
|
|
|
* Wraps a string to a given number of characters |
|
12963
|
|
|
* |
|
12964
|
|
|
* EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code> |
|
12965
|
|
|
* |
|
12966
|
|
|
* @see http://php.net/manual/en/function.wordwrap.php |
|
12967
|
|
|
* |
|
12968
|
|
|
* @param string $str <p>The input string.</p> |
|
12969
|
|
|
* @param int $width [optional] <p>The column width.</p> |
|
12970
|
|
|
* @param string $break [optional] <p>The line is broken using the optional break parameter.</p> |
|
12971
|
|
|
* @param bool $cut [optional] <p> |
|
12972
|
|
|
* If the cut is set to true, the string is |
|
12973
|
|
|
* always wrapped at or before the specified width. So if you have |
|
12974
|
|
|
* a word that is larger than the given width, it is broken apart. |
|
12975
|
|
|
* </p> |
|
12976
|
|
|
* |
|
12977
|
|
|
* @psalm-pure |
|
12978
|
|
|
* |
|
12979
|
|
|
* @return string |
|
12980
|
|
|
* <p>The given string wrapped at the specified column.</p> |
|
12981
|
|
|
*/ |
|
12982
|
12 |
|
public static function wordwrap( |
|
12983
|
|
|
string $str, |
|
12984
|
|
|
int $width = 75, |
|
12985
|
|
|
string $break = "\n", |
|
12986
|
|
|
bool $cut = false |
|
12987
|
|
|
): string { |
|
12988
|
12 |
|
if ($str === '' || $break === '') { |
|
12989
|
4 |
|
return ''; |
|
12990
|
|
|
} |
|
12991
|
|
|
|
|
12992
|
10 |
|
$str_split = \explode($break, $str); |
|
12993
|
|
|
|
|
12994
|
|
|
/** @var string[] $charsArray */ |
|
12995
|
10 |
|
$charsArray = []; |
|
12996
|
10 |
|
$word_split = ''; |
|
12997
|
10 |
|
foreach ($str_split as $i => $i_value) { |
|
12998
|
10 |
|
if ($i) { |
|
12999
|
3 |
|
$charsArray[] = $break; |
|
13000
|
3 |
|
$word_split .= '#'; |
|
13001
|
|
|
} |
|
13002
|
|
|
|
|
13003
|
10 |
|
foreach (self::str_split($i_value) as $c) { |
|
13004
|
10 |
|
$charsArray[] = $c; |
|
13005
|
10 |
|
if ($c === ' ') { |
|
13006
|
3 |
|
$word_split .= ' '; |
|
13007
|
|
|
} else { |
|
13008
|
10 |
|
$word_split .= '?'; |
|
13009
|
|
|
} |
|
13010
|
|
|
} |
|
13011
|
|
|
} |
|
13012
|
|
|
|
|
13013
|
10 |
|
$str_return = ''; |
|
13014
|
10 |
|
$j = 0; |
|
13015
|
10 |
|
$b = -1; |
|
13016
|
10 |
|
$i = -1; |
|
13017
|
10 |
|
$word_split = \wordwrap($word_split, $width, '#', $cut); |
|
13018
|
|
|
|
|
13019
|
10 |
|
$max = \mb_strlen($word_split); |
|
13020
|
|
|
/** @noinspection PhpAssignmentInConditionInspection - is ok here */ |
|
13021
|
10 |
|
while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) { |
|
13022
|
8 |
|
for (++$i; $i < $b; ++$i) { |
|
13023
|
8 |
|
if (isset($charsArray[$j])) { |
|
13024
|
8 |
|
$str_return .= $charsArray[$j]; |
|
13025
|
8 |
|
unset($charsArray[$j]); |
|
13026
|
|
|
} |
|
13027
|
8 |
|
++$j; |
|
13028
|
|
|
|
|
13029
|
|
|
// prevent endless loop, e.g. if there is a error in the "mb_*" polyfill |
|
13030
|
8 |
|
if ($i > $max) { |
|
13031
|
|
|
break 2; |
|
13032
|
|
|
} |
|
13033
|
|
|
} |
|
13034
|
|
|
|
|
13035
|
|
|
if ( |
|
13036
|
8 |
|
$break === $charsArray[$j] |
|
13037
|
|
|
|| |
|
13038
|
8 |
|
$charsArray[$j] === ' ' |
|
13039
|
|
|
) { |
|
13040
|
5 |
|
unset($charsArray[$j++]); |
|
13041
|
|
|
} |
|
13042
|
|
|
|
|
13043
|
8 |
|
$str_return .= $break; |
|
13044
|
|
|
|
|
13045
|
|
|
// prevent endless loop, e.g. if there is a error in the "mb_*" polyfill |
|
13046
|
8 |
|
if ($b > $max) { |
|
13047
|
|
|
break; |
|
13048
|
|
|
} |
|
13049
|
|
|
} |
|
13050
|
|
|
|
|
13051
|
10 |
|
return $str_return . \implode('', $charsArray); |
|
13052
|
|
|
} |
|
13053
|
|
|
|
|
13054
|
|
|
/** |
|
13055
|
|
|
* Line-Wrap the string after $limit, but split the string by "$delimiter" before ... |
|
13056
|
|
|
* ... so that we wrap the per line. |
|
13057
|
|
|
* |
|
13058
|
|
|
* @param string $str <p>The input string.</p> |
|
13059
|
|
|
* @param int $width [optional] <p>The column width.</p> |
|
13060
|
|
|
* @param string $break [optional] <p>The line is broken using the optional break parameter.</p> |
|
13061
|
|
|
* @param bool $cut [optional] <p> |
|
13062
|
|
|
* If the cut is set to true, the string is |
|
13063
|
|
|
* always wrapped at or before the specified width. So if you have |
|
13064
|
|
|
* a word that is larger than the given width, it is broken apart. |
|
13065
|
|
|
* </p> |
|
13066
|
|
|
* @param bool $add_final_break [optional] <p> |
|
13067
|
|
|
* If this flag is true, then the method will add a $break at the end |
|
13068
|
|
|
* of the result string. |
|
13069
|
|
|
* </p> |
|
13070
|
|
|
* @param string|null $delimiter [optional] <p> |
|
13071
|
|
|
* You can change the default behavior, where we split the string by newline. |
|
13072
|
|
|
* </p> |
|
13073
|
|
|
* |
|
13074
|
|
|
* @psalm-pure |
|
13075
|
|
|
* |
|
13076
|
|
|
* @return string |
|
13077
|
|
|
*/ |
|
13078
|
1 |
|
public static function wordwrap_per_line( |
|
13079
|
|
|
string $str, |
|
13080
|
|
|
int $width = 75, |
|
13081
|
|
|
string $break = "\n", |
|
13082
|
|
|
bool $cut = false, |
|
13083
|
|
|
bool $add_final_break = true, |
|
13084
|
|
|
string $delimiter = null |
|
13085
|
|
|
): string { |
|
13086
|
1 |
|
if ($delimiter === null) { |
|
13087
|
1 |
|
$strings = \preg_split('/\\r\\n|\\r|\\n/', $str); |
|
13088
|
|
|
} else { |
|
13089
|
1 |
|
$strings = \explode($delimiter, $str); |
|
13090
|
|
|
} |
|
13091
|
|
|
|
|
13092
|
1 |
|
$string_helper_array = []; |
|
13093
|
1 |
|
if ($strings !== false) { |
|
13094
|
1 |
|
foreach ($strings as $value) { |
|
13095
|
1 |
|
$string_helper_array[] = self::wordwrap($value, $width, $break, $cut); |
|
13096
|
|
|
} |
|
13097
|
|
|
} |
|
13098
|
|
|
|
|
13099
|
1 |
|
if ($add_final_break) { |
|
13100
|
1 |
|
$final_break = $break; |
|
13101
|
|
|
} else { |
|
13102
|
1 |
|
$final_break = ''; |
|
13103
|
|
|
} |
|
13104
|
|
|
|
|
13105
|
1 |
|
return \implode($delimiter ?? "\n", $string_helper_array) . $final_break; |
|
13106
|
|
|
} |
|
13107
|
|
|
|
|
13108
|
|
|
/** |
|
13109
|
|
|
* Returns an array of Unicode White Space characters. |
|
13110
|
|
|
* |
|
13111
|
|
|
* @psalm-pure |
|
13112
|
|
|
* |
|
13113
|
|
|
* @return string[] |
|
13114
|
|
|
* <p>An array with numeric code point as key and White Space Character as value.</p> |
|
13115
|
|
|
*/ |
|
13116
|
2 |
|
public static function ws(): array |
|
13117
|
|
|
{ |
|
13118
|
2 |
|
return self::$WHITESPACE; |
|
13119
|
|
|
} |
|
13120
|
|
|
|
|
13121
|
|
|
/** |
|
13122
|
|
|
* Checks whether the passed string contains only byte sequences that are valid UTF-8 characters. |
|
13123
|
|
|
* |
|
13124
|
|
|
* EXAMPLE: <code> |
|
13125
|
|
|
* UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true |
|
13126
|
|
|
* // |
|
13127
|
|
|
* UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false |
|
13128
|
|
|
* </code> |
|
13129
|
|
|
* |
|
13130
|
|
|
* @see http://hsivonen.iki.fi/php-utf8/ |
|
13131
|
|
|
* |
|
13132
|
|
|
* @param string $str <p>The string to be checked.</p> |
|
13133
|
|
|
* @param bool $strict <p>Check also if the string is not UTF-16 or UTF-32.</p> |
|
13134
|
|
|
* |
|
13135
|
|
|
* @psalm-pure |
|
13136
|
|
|
* |
|
13137
|
|
|
* @return bool |
|
13138
|
|
|
* |
|
13139
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
|
13140
|
|
|
*/ |
|
13141
|
110 |
|
private static function is_utf8_string(string $str, bool $strict = false) |
|
13142
|
|
|
{ |
|
13143
|
110 |
|
if ($str === '') { |
|
13144
|
15 |
|
return true; |
|
13145
|
|
|
} |
|
13146
|
|
|
|
|
13147
|
103 |
|
if ($strict) { |
|
13148
|
2 |
|
$is_binary = self::is_binary($str, true); |
|
13149
|
|
|
|
|
13150
|
2 |
|
if ($is_binary && self::is_utf16($str, false) !== false) { |
|
13151
|
2 |
|
return false; |
|
13152
|
|
|
} |
|
13153
|
|
|
|
|
13154
|
2 |
|
if ($is_binary && self::is_utf32($str, false) !== false) { |
|
13155
|
|
|
return false; |
|
13156
|
|
|
} |
|
13157
|
|
|
} |
|
13158
|
|
|
|
|
13159
|
103 |
|
if (self::$SUPPORT['pcre_utf8']) { |
|
13160
|
|
|
// If even just the first character can be matched, when the /u |
|
13161
|
|
|
// modifier is used, then it's valid UTF-8. If the UTF-8 is somehow |
|
13162
|
|
|
// invalid, nothing at all will match, even if the string contains |
|
13163
|
|
|
// some valid sequences |
|
13164
|
103 |
|
return \preg_match('/^./us', $str) === 1; |
|
13165
|
|
|
} |
|
13166
|
|
|
|
|
13167
|
2 |
|
$mState = 0; // cached expected number of octets after the current octet |
|
13168
|
|
|
// until the beginning of the next UTF8 character sequence |
|
13169
|
2 |
|
$mUcs4 = 0; // cached Unicode character |
|
13170
|
2 |
|
$mBytes = 1; // cached expected number of octets in the current sequence |
|
13171
|
|
|
|
|
13172
|
2 |
|
if (self::$ORD === null) { |
|
13173
|
|
|
self::$ORD = self::getData('ord'); |
|
13174
|
|
|
} |
|
13175
|
|
|
|
|
13176
|
2 |
|
$len = \strlen($str); |
|
13177
|
2 |
|
for ($i = 0; $i < $len; ++$i) { |
|
13178
|
2 |
|
$in = self::$ORD[$str[$i]]; |
|
13179
|
|
|
|
|
13180
|
2 |
|
if ($mState === 0) { |
|
13181
|
|
|
// When mState is zero we expect either a US-ASCII character or a |
|
13182
|
|
|
// multi-octet sequence. |
|
13183
|
2 |
|
if ((0x80 & $in) === 0) { |
|
13184
|
|
|
// US-ASCII, pass straight through. |
|
13185
|
2 |
|
$mBytes = 1; |
|
13186
|
2 |
|
} elseif ((0xE0 & $in) === 0xC0) { |
|
13187
|
|
|
// First octet of 2 octet sequence. |
|
13188
|
2 |
|
$mUcs4 = $in; |
|
13189
|
2 |
|
$mUcs4 = ($mUcs4 & 0x1F) << 6; |
|
13190
|
2 |
|
$mState = 1; |
|
13191
|
2 |
|
$mBytes = 2; |
|
13192
|
2 |
|
} elseif ((0xF0 & $in) === 0xE0) { |
|
13193
|
|
|
// First octet of 3 octet sequence. |
|
13194
|
2 |
|
$mUcs4 = $in; |
|
13195
|
2 |
|
$mUcs4 = ($mUcs4 & 0x0F) << 12; |
|
13196
|
2 |
|
$mState = 2; |
|
13197
|
2 |
|
$mBytes = 3; |
|
13198
|
|
|
} elseif ((0xF8 & $in) === 0xF0) { |
|
13199
|
|
|
// First octet of 4 octet sequence. |
|
13200
|
|
|
$mUcs4 = $in; |
|
13201
|
|
|
$mUcs4 = ($mUcs4 & 0x07) << 18; |
|
13202
|
|
|
$mState = 3; |
|
13203
|
|
|
$mBytes = 4; |
|
13204
|
|
|
} elseif ((0xFC & $in) === 0xF8) { |
|
13205
|
|
|
/* First octet of 5 octet sequence. |
|
13206
|
|
|
* |
|
13207
|
|
|
* This is illegal because the encoded codepoint must be either |
|
13208
|
|
|
* (a) not the shortest form or |
|
13209
|
|
|
* (b) outside the Unicode range of 0-0x10FFFF. |
|
13210
|
|
|
* Rather than trying to resynchronize, we will carry on until the end |
|
13211
|
|
|
* of the sequence and let the later error handling code catch it. |
|
13212
|
|
|
*/ |
|
13213
|
|
|
$mUcs4 = $in; |
|
13214
|
|
|
$mUcs4 = ($mUcs4 & 0x03) << 24; |
|
13215
|
|
|
$mState = 4; |
|
13216
|
|
|
$mBytes = 5; |
|
13217
|
|
|
} elseif ((0xFE & $in) === 0xFC) { |
|
13218
|
|
|
// First octet of 6 octet sequence, see comments for 5 octet sequence. |
|
13219
|
|
|
$mUcs4 = $in; |
|
13220
|
|
|
$mUcs4 = ($mUcs4 & 1) << 30; |
|
13221
|
|
|
$mState = 5; |
|
13222
|
|
|
$mBytes = 6; |
|
13223
|
|
|
} else { |
|
13224
|
|
|
// Current octet is neither in the US-ASCII range nor a legal first |
|
13225
|
|
|
// octet of a multi-octet sequence. |
|
13226
|
2 |
|
return false; |
|
13227
|
|
|
} |
|
13228
|
2 |
|
} elseif ((0xC0 & $in) === 0x80) { |
|
13229
|
|
|
|
|
13230
|
|
|
// When mState is non-zero, we expect a continuation of the multi-octet |
|
13231
|
|
|
// sequence |
|
13232
|
|
|
|
|
13233
|
|
|
// Legal continuation. |
|
13234
|
2 |
|
$shift = ($mState - 1) * 6; |
|
13235
|
2 |
|
$tmp = $in; |
|
13236
|
2 |
|
$tmp = ($tmp & 0x0000003F) << $shift; |
|
13237
|
2 |
|
$mUcs4 |= $tmp; |
|
13238
|
|
|
// Prefix: End of the multi-octet sequence. mUcs4 now contains the final |
|
13239
|
|
|
// Unicode code point to be output. |
|
13240
|
2 |
|
if (--$mState === 0) { |
|
13241
|
|
|
// Check for illegal sequences and code points. |
|
13242
|
|
|
// |
|
13243
|
|
|
// From Unicode 3.1, non-shortest form is illegal |
|
13244
|
|
|
if ( |
|
13245
|
2 |
|
($mBytes === 2 && $mUcs4 < 0x0080) |
|
13246
|
|
|
|| |
|
13247
|
2 |
|
($mBytes === 3 && $mUcs4 < 0x0800) |
|
13248
|
|
|
|| |
|
13249
|
2 |
|
($mBytes === 4 && $mUcs4 < 0x10000) |
|
13250
|
|
|
|| |
|
13251
|
2 |
|
($mBytes > 4) |
|
13252
|
|
|
|| |
|
13253
|
|
|
// From Unicode 3.2, surrogate characters are illegal. |
|
13254
|
2 |
|
(($mUcs4 & 0xFFFFF800) === 0xD800) |
|
13255
|
|
|
|| |
|
13256
|
|
|
// Code points outside the Unicode range are illegal. |
|
13257
|
2 |
|
($mUcs4 > 0x10FFFF) |
|
13258
|
|
|
) { |
|
13259
|
|
|
return false; |
|
13260
|
|
|
} |
|
13261
|
|
|
// initialize UTF8 cache |
|
13262
|
2 |
|
$mState = 0; |
|
13263
|
2 |
|
$mUcs4 = 0; |
|
13264
|
2 |
|
$mBytes = 1; |
|
13265
|
|
|
} |
|
13266
|
|
|
} else { |
|
13267
|
|
|
// ((0xC0 & (*in) != 0x80) && (mState != 0)) |
|
13268
|
|
|
// Incomplete multi-octet sequence. |
|
13269
|
|
|
return false; |
|
13270
|
|
|
} |
|
13271
|
|
|
} |
|
13272
|
|
|
|
|
13273
|
2 |
|
return $mState === 0; |
|
13274
|
|
|
} |
|
13275
|
|
|
|
|
13276
|
|
|
/** |
|
13277
|
|
|
* @param string $str |
|
13278
|
|
|
* @param bool $use_lowercase <p>Use uppercase by default, otherwise use lowercase.</p> |
|
13279
|
|
|
* @param bool $use_full_case_fold <p>Convert not only common cases.</p> |
|
13280
|
|
|
* |
|
13281
|
|
|
* @psalm-pure |
|
13282
|
|
|
* |
|
13283
|
|
|
* @return string |
|
13284
|
|
|
* |
|
13285
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
|
13286
|
|
|
*/ |
|
13287
|
33 |
|
private static function fixStrCaseHelper( |
|
13288
|
|
|
string $str, |
|
13289
|
|
|
bool $use_lowercase = false, |
|
13290
|
|
|
bool $use_full_case_fold = false |
|
13291
|
|
|
) { |
|
13292
|
33 |
|
$upper = self::$COMMON_CASE_FOLD['upper']; |
|
13293
|
33 |
|
$lower = self::$COMMON_CASE_FOLD['lower']; |
|
13294
|
|
|
|
|
13295
|
33 |
|
if ($use_lowercase) { |
|
13296
|
2 |
|
$str = \str_replace( |
|
13297
|
2 |
|
$upper, |
|
13298
|
2 |
|
$lower, |
|
13299
|
2 |
|
$str |
|
13300
|
|
|
); |
|
13301
|
|
|
} else { |
|
13302
|
31 |
|
$str = \str_replace( |
|
13303
|
31 |
|
$lower, |
|
13304
|
31 |
|
$upper, |
|
13305
|
31 |
|
$str |
|
13306
|
|
|
); |
|
13307
|
|
|
} |
|
13308
|
|
|
|
|
13309
|
33 |
|
if ($use_full_case_fold) { |
|
13310
|
|
|
/** |
|
13311
|
|
|
* @psalm-suppress ImpureStaticVariable |
|
13312
|
|
|
* |
|
13313
|
|
|
* @var array<mixed>|null |
|
13314
|
|
|
*/ |
|
13315
|
31 |
|
static $FULL_CASE_FOLD = null; |
|
13316
|
31 |
|
if ($FULL_CASE_FOLD === null) { |
|
13317
|
1 |
|
$FULL_CASE_FOLD = self::getData('caseFolding_full'); |
|
13318
|
|
|
} |
|
13319
|
|
|
|
|
13320
|
31 |
|
if ($use_lowercase) { |
|
13321
|
2 |
|
$str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str); |
|
13322
|
|
|
} else { |
|
13323
|
29 |
|
$str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str); |
|
13324
|
|
|
} |
|
13325
|
|
|
} |
|
13326
|
|
|
|
|
13327
|
33 |
|
return $str; |
|
13328
|
|
|
} |
|
13329
|
|
|
|
|
13330
|
|
|
/** |
|
13331
|
|
|
* get data from "/data/*.php" |
|
13332
|
|
|
* |
|
13333
|
|
|
* @param string $file |
|
13334
|
|
|
* |
|
13335
|
|
|
* @psalm-pure |
|
13336
|
|
|
* |
|
13337
|
|
|
* @return array |
|
13338
|
|
|
* |
|
13339
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
|
13340
|
|
|
*/ |
|
13341
|
7 |
|
private static function getData(string $file) |
|
13342
|
|
|
{ |
|
13343
|
|
|
/** @noinspection PhpIncludeInspection */ |
|
13344
|
|
|
/** @noinspection UsingInclusionReturnValueInspection */ |
|
13345
|
|
|
/** @psalm-suppress UnresolvableInclude */ |
|
13346
|
7 |
|
return include __DIR__ . '/data/' . $file . '.php'; |
|
13347
|
|
|
} |
|
13348
|
|
|
|
|
13349
|
|
|
/** |
|
13350
|
|
|
* @psalm-pure |
|
13351
|
|
|
* |
|
13352
|
|
|
* @return true|null |
|
13353
|
|
|
* |
|
13354
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
|
13355
|
|
|
*/ |
|
13356
|
1 |
|
private static function initEmojiData() |
|
13357
|
|
|
{ |
|
13358
|
1 |
|
if (self::$EMOJI_KEYS_CACHE === null) { |
|
13359
|
1 |
|
if (self::$EMOJI === null) { |
|
13360
|
1 |
|
self::$EMOJI = self::getData('emoji'); |
|
13361
|
|
|
} |
|
13362
|
|
|
|
|
13363
|
|
|
/** |
|
13364
|
|
|
* @psalm-suppress ImpureFunctionCall - static sort function is used |
|
13365
|
|
|
*/ |
|
13366
|
1 |
|
\uksort( |
|
13367
|
1 |
|
self::$EMOJI, |
|
13368
|
|
|
static function (string $a, string $b): int { |
|
13369
|
1 |
|
return \strlen($b) <=> \strlen($a); |
|
13370
|
1 |
|
} |
|
13371
|
|
|
); |
|
13372
|
|
|
|
|
13373
|
1 |
|
self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI); |
|
13374
|
1 |
|
self::$EMOJI_VALUES_CACHE = self::$EMOJI; |
|
13375
|
|
|
|
|
13376
|
1 |
|
foreach (self::$EMOJI_KEYS_CACHE as $key) { |
|
13377
|
1 |
|
$tmp_key = \crc32($key); |
|
13378
|
1 |
|
self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_'; |
|
13379
|
|
|
} |
|
13380
|
|
|
|
|
13381
|
1 |
|
return true; |
|
13382
|
|
|
} |
|
13383
|
|
|
|
|
13384
|
|
|
return null; |
|
13385
|
|
|
} |
|
13386
|
|
|
|
|
13387
|
|
|
/** |
|
13388
|
|
|
* Checks whether mbstring "overloaded" is active on the server. |
|
13389
|
|
|
* |
|
13390
|
|
|
* @psalm-pure |
|
13391
|
|
|
* |
|
13392
|
|
|
* @return bool |
|
13393
|
|
|
*/ |
|
13394
|
|
|
private static function mbstring_overloaded(): bool |
|
13395
|
|
|
{ |
|
13396
|
|
|
/** |
|
13397
|
|
|
* INI directive 'mbstring.func_overload' is deprecated since PHP 7.2 |
|
13398
|
|
|
*/ |
|
13399
|
|
|
|
|
13400
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
|
13401
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
|
13402
|
|
|
/** @noinspection DeprecatedIniOptionsInspection */ |
|
13403
|
|
|
return \defined('MB_OVERLOAD_STRING') |
|
13404
|
|
|
&& |
|
13405
|
|
|
((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING); |
|
13406
|
|
|
} |
|
13407
|
|
|
|
|
13408
|
|
|
/** |
|
13409
|
|
|
* @param array $strings |
|
13410
|
|
|
* @param bool $remove_empty_values |
|
13411
|
|
|
* @param int|null $remove_short_values |
|
13412
|
|
|
* |
|
13413
|
|
|
* @psalm-pure |
|
13414
|
|
|
* |
|
13415
|
|
|
* @return array |
|
13416
|
|
|
* |
|
13417
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
|
13418
|
|
|
*/ |
|
13419
|
2 |
|
private static function reduce_string_array( |
|
13420
|
|
|
array $strings, |
|
13421
|
|
|
bool $remove_empty_values, |
|
13422
|
|
|
int $remove_short_values = null |
|
13423
|
|
|
) { |
|
13424
|
|
|
// init |
|
13425
|
2 |
|
$return = []; |
|
13426
|
|
|
|
|
13427
|
2 |
|
foreach ($strings as &$str) { |
|
13428
|
|
|
if ( |
|
13429
|
2 |
|
$remove_short_values !== null |
|
13430
|
|
|
&& |
|
13431
|
2 |
|
\mb_strlen($str) <= $remove_short_values |
|
13432
|
|
|
) { |
|
13433
|
2 |
|
continue; |
|
13434
|
|
|
} |
|
13435
|
|
|
|
|
13436
|
|
|
if ( |
|
13437
|
2 |
|
$remove_empty_values |
|
13438
|
|
|
&& |
|
13439
|
2 |
|
\trim($str) === '' |
|
13440
|
|
|
) { |
|
13441
|
2 |
|
continue; |
|
13442
|
|
|
} |
|
13443
|
|
|
|
|
13444
|
2 |
|
$return[] = $str; |
|
13445
|
|
|
} |
|
13446
|
|
|
|
|
13447
|
2 |
|
return $return; |
|
13448
|
|
|
} |
|
13449
|
|
|
|
|
13450
|
|
|
/** |
|
13451
|
|
|
* rxClass |
|
13452
|
|
|
* |
|
13453
|
|
|
* @param string $s |
|
13454
|
|
|
* @param string $class |
|
13455
|
|
|
* |
|
13456
|
|
|
* @return string |
|
13457
|
|
|
* * |
|
13458
|
|
|
* @psalm-pure |
|
13459
|
|
|
*/ |
|
13460
|
36 |
|
private static function rxClass(string $s, string $class = '') |
|
13461
|
|
|
{ |
|
13462
|
|
|
/** |
|
13463
|
|
|
* @psalm-suppress ImpureStaticVariable |
|
13464
|
|
|
* |
|
13465
|
|
|
* @var array<string,string> |
|
13466
|
|
|
*/ |
|
13467
|
36 |
|
static $RX_CLASS_CACHE = []; |
|
13468
|
|
|
|
|
13469
|
36 |
|
$cache_key = $s . '_' . $class; |
|
13470
|
|
|
|
|
13471
|
36 |
|
if (isset($RX_CLASS_CACHE[$cache_key])) { |
|
13472
|
24 |
|
return $RX_CLASS_CACHE[$cache_key]; |
|
13473
|
|
|
} |
|
13474
|
|
|
|
|
13475
|
16 |
|
$class_array[] = $class; |
|
|
|
|
|
|
13476
|
|
|
|
|
13477
|
|
|
/** @noinspection SuspiciousLoopInspection */ |
|
13478
|
|
|
/** @noinspection AlterInForeachInspection */ |
|
13479
|
16 |
|
foreach (self::str_split($s) as &$s) { |
|
|
|
|
|
|
13480
|
15 |
|
if ($s === '-') { |
|
13481
|
|
|
$class_array[0] = '-' . $class_array[0]; |
|
13482
|
15 |
|
} elseif (!isset($s[2])) { |
|
13483
|
15 |
|
$class_array[0] .= \preg_quote($s, '/'); |
|
13484
|
1 |
|
} elseif (self::strlen($s) === 1) { |
|
13485
|
1 |
|
$class_array[0] .= $s; |
|
13486
|
|
|
} else { |
|
13487
|
15 |
|
$class_array[] = $s; |
|
13488
|
|
|
} |
|
13489
|
|
|
} |
|
13490
|
|
|
|
|
13491
|
16 |
|
if ($class_array[0]) { |
|
13492
|
16 |
|
$class_array[0] = '[' . $class_array[0] . ']'; |
|
13493
|
|
|
} |
|
13494
|
|
|
|
|
13495
|
16 |
|
if (\count($class_array) === 1) { |
|
13496
|
16 |
|
$return = $class_array[0]; |
|
13497
|
|
|
} else { |
|
13498
|
|
|
$return = '(?:' . \implode('|', $class_array) . ')'; |
|
13499
|
|
|
} |
|
13500
|
|
|
|
|
13501
|
16 |
|
$RX_CLASS_CACHE[$cache_key] = $return; |
|
13502
|
|
|
|
|
13503
|
16 |
|
return $return; |
|
13504
|
|
|
} |
|
13505
|
|
|
|
|
13506
|
|
|
/** |
|
13507
|
|
|
* Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius"). |
|
13508
|
|
|
* |
|
13509
|
|
|
* @param string $names |
|
13510
|
|
|
* @param string $delimiter |
|
13511
|
|
|
* @param string $encoding |
|
13512
|
|
|
* |
|
13513
|
|
|
* @psalm-pure |
|
13514
|
|
|
* |
|
13515
|
|
|
* @return string |
|
13516
|
|
|
* |
|
13517
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
|
13518
|
|
|
*/ |
|
13519
|
1 |
|
private static function str_capitalize_name_helper( |
|
13520
|
|
|
string $names, |
|
13521
|
|
|
string $delimiter, |
|
13522
|
|
|
string $encoding = 'UTF-8' |
|
13523
|
|
|
) { |
|
13524
|
|
|
// init |
|
13525
|
1 |
|
$name_helper_array = \explode($delimiter, $names); |
|
13526
|
1 |
|
if ($name_helper_array === false) { |
|
13527
|
|
|
return ''; |
|
13528
|
|
|
} |
|
13529
|
|
|
|
|
13530
|
|
|
$special_cases = [ |
|
13531
|
1 |
|
'names' => [ |
|
13532
|
|
|
'ab', |
|
13533
|
|
|
'af', |
|
13534
|
|
|
'al', |
|
13535
|
|
|
'and', |
|
13536
|
|
|
'ap', |
|
13537
|
|
|
'bint', |
|
13538
|
|
|
'binte', |
|
13539
|
|
|
'da', |
|
13540
|
|
|
'de', |
|
13541
|
|
|
'del', |
|
13542
|
|
|
'den', |
|
13543
|
|
|
'der', |
|
13544
|
|
|
'di', |
|
13545
|
|
|
'dit', |
|
13546
|
|
|
'ibn', |
|
13547
|
|
|
'la', |
|
13548
|
|
|
'mac', |
|
13549
|
|
|
'nic', |
|
13550
|
|
|
'of', |
|
13551
|
|
|
'ter', |
|
13552
|
|
|
'the', |
|
13553
|
|
|
'und', |
|
13554
|
|
|
'van', |
|
13555
|
|
|
'von', |
|
13556
|
|
|
'y', |
|
13557
|
|
|
'zu', |
|
13558
|
|
|
], |
|
13559
|
|
|
'prefixes' => [ |
|
13560
|
|
|
'al-', |
|
13561
|
|
|
"d'", |
|
13562
|
|
|
'ff', |
|
13563
|
|
|
"l'", |
|
13564
|
|
|
'mac', |
|
13565
|
|
|
'mc', |
|
13566
|
|
|
'nic', |
|
13567
|
|
|
], |
|
13568
|
|
|
]; |
|
13569
|
|
|
|
|
13570
|
1 |
|
foreach ($name_helper_array as &$name) { |
|
13571
|
1 |
|
if (\in_array($name, $special_cases['names'], true)) { |
|
13572
|
1 |
|
continue; |
|
13573
|
|
|
} |
|
13574
|
|
|
|
|
13575
|
1 |
|
$continue = false; |
|
13576
|
|
|
|
|
13577
|
1 |
|
if ($delimiter === '-') { |
|
13578
|
1 |
|
foreach ((array) $special_cases['names'] as &$beginning) { |
|
13579
|
1 |
|
if (\strncmp($name, $beginning, \strlen($beginning)) === 0) { |
|
13580
|
1 |
|
$continue = true; |
|
13581
|
|
|
|
|
13582
|
1 |
|
break; |
|
13583
|
|
|
} |
|
13584
|
|
|
} |
|
13585
|
1 |
|
unset($beginning); |
|
13586
|
|
|
} |
|
13587
|
|
|
|
|
13588
|
1 |
|
foreach ((array) $special_cases['prefixes'] as &$beginning) { |
|
13589
|
1 |
|
if (\strncmp($name, $beginning, \strlen($beginning)) === 0) { |
|
13590
|
1 |
|
$continue = true; |
|
13591
|
|
|
|
|
13592
|
1 |
|
break; |
|
13593
|
|
|
} |
|
13594
|
|
|
} |
|
13595
|
1 |
|
unset($beginning); |
|
13596
|
|
|
|
|
13597
|
1 |
|
if ($continue) { |
|
13598
|
1 |
|
continue; |
|
13599
|
|
|
} |
|
13600
|
|
|
|
|
13601
|
1 |
|
$name = self::ucfirst($name, $encoding); |
|
13602
|
|
|
} |
|
13603
|
|
|
|
|
13604
|
1 |
|
return \implode($delimiter, $name_helper_array); |
|
13605
|
|
|
} |
|
13606
|
|
|
|
|
13607
|
|
|
/** |
|
13608
|
|
|
* Generic case-sensitive transformation for collation matching. |
|
13609
|
|
|
* |
|
13610
|
|
|
* @param string $str <p>The input string</p> |
|
13611
|
|
|
* |
|
13612
|
|
|
* @psalm-pure |
|
13613
|
|
|
* |
|
13614
|
|
|
* @return string|null |
|
13615
|
|
|
* |
|
13616
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
|
13617
|
|
|
*/ |
|
13618
|
6 |
|
private static function strtonatfold(string $str) |
|
13619
|
|
|
{ |
|
13620
|
6 |
|
$str = \Normalizer::normalize($str, \Normalizer::NFD); |
|
13621
|
6 |
|
if ($str === false) { |
|
13622
|
2 |
|
return ''; |
|
13623
|
|
|
} |
|
13624
|
|
|
|
|
13625
|
6 |
|
return \preg_replace( |
|
13626
|
6 |
|
'/\p{Mn}+/u', |
|
13627
|
6 |
|
'', |
|
13628
|
6 |
|
$str |
|
13629
|
|
|
); |
|
13630
|
|
|
} |
|
13631
|
|
|
|
|
13632
|
|
|
/** |
|
13633
|
|
|
* @param int|string $input |
|
13634
|
|
|
* |
|
13635
|
|
|
* @psalm-pure |
|
13636
|
|
|
* |
|
13637
|
|
|
* @return string |
|
13638
|
|
|
* |
|
13639
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
|
13640
|
|
|
*/ |
|
13641
|
30 |
|
private static function to_utf8_convert_helper($input) |
|
13642
|
|
|
{ |
|
13643
|
|
|
// init |
|
13644
|
30 |
|
$buf = ''; |
|
13645
|
|
|
|
|
13646
|
30 |
|
if (self::$ORD === null) { |
|
13647
|
|
|
self::$ORD = self::getData('ord'); |
|
13648
|
|
|
} |
|
13649
|
|
|
|
|
13650
|
30 |
|
if (self::$CHR === null) { |
|
13651
|
|
|
self::$CHR = self::getData('chr'); |
|
13652
|
|
|
} |
|
13653
|
|
|
|
|
13654
|
30 |
|
if (self::$WIN1252_TO_UTF8 === null) { |
|
13655
|
1 |
|
self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8'); |
|
13656
|
|
|
} |
|
13657
|
|
|
|
|
13658
|
30 |
|
$ordC1 = self::$ORD[$input]; |
|
13659
|
30 |
|
if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases |
|
13660
|
30 |
|
$buf .= self::$WIN1252_TO_UTF8[$ordC1]; |
|
13661
|
|
|
} else { |
|
13662
|
|
|
/** @noinspection OffsetOperationsInspection */ |
|
13663
|
1 |
|
$cc1 = self::$CHR[$ordC1 / 64] | "\xC0"; |
|
13664
|
1 |
|
$cc2 = ((string) $input & "\x3F") | "\x80"; |
|
|
|
|
|
|
13665
|
1 |
|
$buf .= $cc1 . $cc2; |
|
13666
|
|
|
} |
|
13667
|
|
|
|
|
13668
|
30 |
|
return $buf; |
|
13669
|
|
|
} |
|
13670
|
|
|
|
|
13671
|
|
|
/** |
|
13672
|
|
|
* @param string $str |
|
13673
|
|
|
* |
|
13674
|
|
|
* @psalm-pure |
|
13675
|
|
|
* |
|
13676
|
|
|
* @return string |
|
13677
|
|
|
* |
|
13678
|
|
|
* @noinspection ReturnTypeCanBeDeclaredInspection |
|
13679
|
|
|
*/ |
|
13680
|
9 |
|
private static function urldecode_unicode_helper(string $str) |
|
13681
|
|
|
{ |
|
13682
|
9 |
|
if (\strpos($str, '%u') === false) { |
|
13683
|
9 |
|
return $str; |
|
13684
|
|
|
} |
|
13685
|
|
|
|
|
13686
|
7 |
|
$pattern = '/%u([0-9a-fA-F]{3,4})/'; |
|
13687
|
7 |
|
if (\preg_match($pattern, $str)) { |
|
13688
|
7 |
|
$str = (string) \preg_replace($pattern, '&#x\\1;', $str); |
|
13689
|
|
|
} |
|
13690
|
|
|
|
|
13691
|
7 |
|
return $str; |
|
13692
|
|
|
} |
|
13693
|
|
|
} |
|
13694
|
|
|
|