1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace voku\helper; |
6
|
|
|
|
7
|
|
|
final class UTF8 |
8
|
|
|
{ |
9
|
|
|
// (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control]) |
10
|
|
|
// This regular expression is a work around for http://bugs.exim.org/1279 |
11
|
|
|
const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])"; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* Bom => Byte-Length |
15
|
|
|
* |
16
|
|
|
* INFO: https://en.wikipedia.org/wiki/Byte_order_mark |
17
|
|
|
* |
18
|
|
|
* @var array |
19
|
|
|
*/ |
20
|
|
|
private static $BOM = [ |
21
|
|
|
"\xef\xbb\xbf" => 3, // UTF-8 BOM |
22
|
|
|
'' => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...) |
23
|
|
|
"\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM |
24
|
|
|
' þÿ' => 6, // UTF-32 (BE) BOM as "WINDOWS-1252" |
25
|
|
|
"\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM |
26
|
|
|
'ÿþ ' => 6, // UTF-32 (LE) BOM as "WINDOWS-1252" |
27
|
|
|
"\xfe\xff" => 2, // UTF-16 (BE) BOM |
28
|
|
|
'þÿ' => 4, // UTF-16 (BE) BOM as "WINDOWS-1252" |
29
|
|
|
"\xff\xfe" => 2, // UTF-16 (LE) BOM |
30
|
|
|
'ÿþ' => 4, // UTF-16 (LE) BOM as "WINDOWS-1252" |
31
|
|
|
]; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* Numeric code point => UTF-8 Character |
35
|
|
|
* |
36
|
|
|
* url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp |
37
|
|
|
* |
38
|
|
|
* @var array |
39
|
|
|
*/ |
40
|
|
|
private static $WHITESPACE = [ |
41
|
|
|
// NUL Byte |
42
|
|
|
0 => "\x0", |
43
|
|
|
// Tab |
44
|
|
|
9 => "\x9", |
45
|
|
|
// New Line |
46
|
|
|
10 => "\xa", |
47
|
|
|
// Vertical Tab |
48
|
|
|
11 => "\xb", |
49
|
|
|
// Carriage Return |
50
|
|
|
13 => "\xd", |
51
|
|
|
// Ordinary Space |
52
|
|
|
32 => "\x20", |
53
|
|
|
// NO-BREAK SPACE |
54
|
|
|
160 => "\xc2\xa0", |
55
|
|
|
// OGHAM SPACE MARK |
56
|
|
|
5760 => "\xe1\x9a\x80", |
57
|
|
|
// MONGOLIAN VOWEL SEPARATOR |
58
|
|
|
6158 => "\xe1\xa0\x8e", |
59
|
|
|
// EN QUAD |
60
|
|
|
8192 => "\xe2\x80\x80", |
61
|
|
|
// EM QUAD |
62
|
|
|
8193 => "\xe2\x80\x81", |
63
|
|
|
// EN SPACE |
64
|
|
|
8194 => "\xe2\x80\x82", |
65
|
|
|
// EM SPACE |
66
|
|
|
8195 => "\xe2\x80\x83", |
67
|
|
|
// THREE-PER-EM SPACE |
68
|
|
|
8196 => "\xe2\x80\x84", |
69
|
|
|
// FOUR-PER-EM SPACE |
70
|
|
|
8197 => "\xe2\x80\x85", |
71
|
|
|
// SIX-PER-EM SPACE |
72
|
|
|
8198 => "\xe2\x80\x86", |
73
|
|
|
// FIGURE SPACE |
74
|
|
|
8199 => "\xe2\x80\x87", |
75
|
|
|
// PUNCTUATION SPACE |
76
|
|
|
8200 => "\xe2\x80\x88", |
77
|
|
|
// THIN SPACE |
78
|
|
|
8201 => "\xe2\x80\x89", |
79
|
|
|
//HAIR SPACE |
80
|
|
|
8202 => "\xe2\x80\x8a", |
81
|
|
|
// LINE SEPARATOR |
82
|
|
|
8232 => "\xe2\x80\xa8", |
83
|
|
|
// PARAGRAPH SEPARATOR |
84
|
|
|
8233 => "\xe2\x80\xa9", |
85
|
|
|
// NARROW NO-BREAK SPACE |
86
|
|
|
8239 => "\xe2\x80\xaf", |
87
|
|
|
// MEDIUM MATHEMATICAL SPACE |
88
|
|
|
8287 => "\xe2\x81\x9f", |
89
|
|
|
// IDEOGRAPHIC SPACE |
90
|
|
|
12288 => "\xe3\x80\x80", |
91
|
|
|
]; |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* @var array |
95
|
|
|
*/ |
96
|
|
|
private static $WHITESPACE_TABLE = [ |
97
|
|
|
'SPACE' => "\x20", |
98
|
|
|
'NO-BREAK SPACE' => "\xc2\xa0", |
99
|
|
|
'OGHAM SPACE MARK' => "\xe1\x9a\x80", |
100
|
|
|
'EN QUAD' => "\xe2\x80\x80", |
101
|
|
|
'EM QUAD' => "\xe2\x80\x81", |
102
|
|
|
'EN SPACE' => "\xe2\x80\x82", |
103
|
|
|
'EM SPACE' => "\xe2\x80\x83", |
104
|
|
|
'THREE-PER-EM SPACE' => "\xe2\x80\x84", |
105
|
|
|
'FOUR-PER-EM SPACE' => "\xe2\x80\x85", |
106
|
|
|
'SIX-PER-EM SPACE' => "\xe2\x80\x86", |
107
|
|
|
'FIGURE SPACE' => "\xe2\x80\x87", |
108
|
|
|
'PUNCTUATION SPACE' => "\xe2\x80\x88", |
109
|
|
|
'THIN SPACE' => "\xe2\x80\x89", |
110
|
|
|
'HAIR SPACE' => "\xe2\x80\x8a", |
111
|
|
|
'LINE SEPARATOR' => "\xe2\x80\xa8", |
112
|
|
|
'PARAGRAPH SEPARATOR' => "\xe2\x80\xa9", |
113
|
|
|
'ZERO WIDTH SPACE' => "\xe2\x80\x8b", |
114
|
|
|
'NARROW NO-BREAK SPACE' => "\xe2\x80\xaf", |
115
|
|
|
'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f", |
116
|
|
|
'IDEOGRAPHIC SPACE' => "\xe3\x80\x80", |
117
|
|
|
]; |
118
|
|
|
|
119
|
|
|
/** |
120
|
|
|
* bidirectional text chars |
121
|
|
|
* |
122
|
|
|
* url: https://www.w3.org/International/questions/qa-bidi-unicode-controls |
123
|
|
|
* |
124
|
|
|
* @var array |
125
|
|
|
*/ |
126
|
|
|
private static $BIDI_UNI_CODE_CONTROLS_TABLE = [ |
127
|
|
|
// LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr") |
128
|
|
|
8234 => "\xE2\x80\xAA", |
129
|
|
|
// RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl") |
130
|
|
|
8235 => "\xE2\x80\xAB", |
131
|
|
|
// POP DIRECTIONAL FORMATTING // (use -> </bdo>) |
132
|
|
|
8236 => "\xE2\x80\xAC", |
133
|
|
|
// LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">) |
134
|
|
|
8237 => "\xE2\x80\xAD", |
135
|
|
|
// RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">) |
136
|
|
|
8238 => "\xE2\x80\xAE", |
137
|
|
|
// LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr") |
138
|
|
|
8294 => "\xE2\x81\xA6", |
139
|
|
|
// RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl") |
140
|
|
|
8295 => "\xE2\x81\xA7", |
141
|
|
|
// FIRST STRONG ISOLATE // (use -> dir = "auto") |
142
|
|
|
8296 => "\xE2\x81\xA8", |
143
|
|
|
// POP DIRECTIONAL ISOLATE |
144
|
|
|
8297 => "\xE2\x81\xA9", |
145
|
|
|
]; |
146
|
|
|
|
147
|
|
|
/** |
148
|
|
|
* @var array |
149
|
|
|
*/ |
150
|
|
|
private static $COMMON_CASE_FOLD = [ |
151
|
|
|
'upper' => [ |
152
|
|
|
'µ', |
153
|
|
|
'ſ', |
154
|
|
|
"\xCD\x85", |
155
|
|
|
'ς', |
156
|
|
|
'ẞ', |
157
|
|
|
"\xCF\x90", |
158
|
|
|
"\xCF\x91", |
159
|
|
|
"\xCF\x95", |
160
|
|
|
"\xCF\x96", |
161
|
|
|
"\xCF\xB0", |
162
|
|
|
"\xCF\xB1", |
163
|
|
|
"\xCF\xB5", |
164
|
|
|
"\xE1\xBA\x9B", |
165
|
|
|
"\xE1\xBE\xBE", |
166
|
|
|
], |
167
|
|
|
'lower' => [ |
168
|
|
|
'μ', |
169
|
|
|
's', |
170
|
|
|
'ι', |
171
|
|
|
'σ', |
172
|
|
|
'ß', |
173
|
|
|
'β', |
174
|
|
|
'θ', |
175
|
|
|
'φ', |
176
|
|
|
'π', |
177
|
|
|
'κ', |
178
|
|
|
'ρ', |
179
|
|
|
'ε', |
180
|
|
|
"\xE1\xB9\xA1", |
181
|
|
|
'ι', |
182
|
|
|
], |
183
|
|
|
]; |
184
|
|
|
|
185
|
|
|
/** |
186
|
|
|
* @var array |
187
|
|
|
*/ |
188
|
|
|
private static $SUPPORT = []; |
189
|
|
|
|
190
|
|
|
/** |
191
|
|
|
* @var array|null |
192
|
|
|
*/ |
193
|
|
|
private static $BROKEN_UTF8_FIX; |
194
|
|
|
|
195
|
|
|
/** |
196
|
|
|
* @var array|null |
197
|
|
|
*/ |
198
|
|
|
private static $WIN1252_TO_UTF8; |
199
|
|
|
|
200
|
|
|
/** |
201
|
|
|
* @var array|null |
202
|
|
|
*/ |
203
|
|
|
private static $INTL_TRANSLITERATOR_LIST; |
204
|
|
|
|
205
|
|
|
/** |
206
|
|
|
* @var array|null |
207
|
|
|
*/ |
208
|
|
|
private static $ENCODINGS; |
209
|
|
|
|
210
|
|
|
/** |
211
|
|
|
* @var array|null |
212
|
|
|
*/ |
213
|
|
|
private static $ORD; |
214
|
|
|
|
215
|
|
|
/** |
216
|
|
|
* @var array|null |
217
|
|
|
*/ |
218
|
|
|
private static $EMOJI; |
219
|
|
|
|
220
|
|
|
/** |
221
|
|
|
* @var array|null |
222
|
|
|
*/ |
223
|
|
|
private static $EMOJI_VALUES_CACHE; |
224
|
|
|
|
225
|
|
|
/** |
226
|
|
|
* @var array|null |
227
|
|
|
*/ |
228
|
|
|
private static $EMOJI_KEYS_CACHE; |
229
|
|
|
|
230
|
|
|
/** |
231
|
|
|
* @var array|null |
232
|
|
|
*/ |
233
|
|
|
private static $EMOJI_KEYS_REVERSIBLE_CACHE; |
234
|
|
|
|
235
|
|
|
/** |
236
|
|
|
* @var array|null |
237
|
|
|
*/ |
238
|
|
|
private static $CHR; |
239
|
|
|
|
240
|
|
|
/** |
241
|
|
|
* __construct() |
242
|
|
|
*/ |
243
|
32 |
|
public function __construct() |
244
|
|
|
{ |
245
|
32 |
|
} |
246
|
|
|
|
247
|
|
|
/** |
248
|
|
|
* Return the character at the specified position: $str[1] like functionality. |
249
|
|
|
* |
250
|
|
|
* @param string $str <p>A UTF-8 string.</p> |
251
|
|
|
* @param int $pos <p>The position of character to return.</p> |
252
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
253
|
|
|
* |
254
|
|
|
* @return string single multi-byte character |
255
|
|
|
*/ |
256
|
3 |
|
public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string |
257
|
|
|
{ |
258
|
3 |
|
if ($str === '' || $pos < 0) { |
259
|
2 |
|
return ''; |
260
|
|
|
} |
261
|
|
|
|
262
|
3 |
|
if ($encoding === 'UTF-8') { |
263
|
3 |
|
return (string) \mb_substr($str, $pos, 1); |
264
|
|
|
} |
265
|
|
|
|
266
|
|
|
return (string) self::substr($str, $pos, 1, $encoding); |
267
|
|
|
} |
268
|
|
|
|
269
|
|
|
/** |
270
|
|
|
* Prepends UTF-8 BOM character to the string and returns the whole string. |
271
|
|
|
* |
272
|
|
|
* INFO: If BOM already existed there, the Input string is returned. |
273
|
|
|
* |
274
|
|
|
* @param string $str <p>The input string.</p> |
275
|
|
|
* |
276
|
|
|
* @return string the output string that contains BOM |
277
|
|
|
*/ |
278
|
2 |
|
public static function add_bom_to_string(string $str): string |
279
|
|
|
{ |
280
|
2 |
|
if (self::string_has_bom($str) === false) { |
281
|
2 |
|
$str = self::bom() . $str; |
282
|
|
|
} |
283
|
|
|
|
284
|
2 |
|
return $str; |
285
|
|
|
} |
286
|
|
|
|
287
|
|
|
/** |
288
|
|
|
* Changes all keys in an array. |
289
|
|
|
* |
290
|
|
|
* @param array $array <p>The array to work on</p> |
291
|
|
|
* @param int $case [optional] <p> Either <strong>CASE_UPPER</strong><br> |
292
|
|
|
* or <strong>CASE_LOWER</strong> (default)</p> |
293
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
294
|
|
|
* |
295
|
|
|
* @return string[] an array with its keys lower or uppercased |
296
|
|
|
*/ |
297
|
2 |
|
public static function array_change_key_case(array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8'): array |
298
|
|
|
{ |
299
|
|
|
if ( |
300
|
2 |
|
$case !== \CASE_LOWER |
301
|
|
|
&& |
302
|
2 |
|
$case !== \CASE_UPPER |
303
|
|
|
) { |
304
|
|
|
$case = \CASE_LOWER; |
305
|
|
|
} |
306
|
|
|
|
307
|
2 |
|
$return = []; |
308
|
2 |
|
foreach ($array as $key => &$value) { |
309
|
2 |
|
$key = $case === \CASE_LOWER |
310
|
2 |
|
? self::strtolower((string) $key, $encoding) |
311
|
2 |
|
: self::strtoupper((string) $key, $encoding); |
312
|
|
|
|
313
|
2 |
|
$return[$key] = $value; |
314
|
|
|
} |
315
|
|
|
|
316
|
2 |
|
return $return; |
317
|
|
|
} |
318
|
|
|
|
319
|
|
|
/** |
320
|
|
|
* Returns the substring between $start and $end, if found, or an empty |
321
|
|
|
* string. An optional offset may be supplied from which to begin the |
322
|
|
|
* search for the start string. |
323
|
|
|
* |
324
|
|
|
* @param string $str |
325
|
|
|
* @param string $start <p>Delimiter marking the start of the substring.</p> |
326
|
|
|
* @param string $end <p>Delimiter marking the end of the substring.</p> |
327
|
|
|
* @param int $offset [optional] <p>Index from which to begin the search. Default: 0</p> |
328
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
329
|
|
|
* |
330
|
|
|
* @return string |
331
|
|
|
*/ |
332
|
16 |
|
public static function between( |
333
|
|
|
string $str, |
334
|
|
|
string $start, |
335
|
|
|
string $end, |
336
|
|
|
int $offset = 0, |
337
|
|
|
string $encoding = 'UTF-8' |
338
|
|
|
): string { |
339
|
16 |
|
if ($encoding === 'UTF-8') { |
340
|
8 |
|
$posStart = \mb_strpos($str, $start, $offset); |
341
|
8 |
|
if ($posStart === false) { |
342
|
1 |
|
return ''; |
343
|
|
|
} |
344
|
|
|
|
345
|
7 |
|
$substrIndex = $posStart + (int) \mb_strlen($start); |
346
|
7 |
|
$posEnd = \mb_strpos($str, $end, $substrIndex); |
347
|
|
|
if ( |
348
|
7 |
|
$posEnd === false |
349
|
|
|
|| |
350
|
7 |
|
$posEnd === $substrIndex |
351
|
|
|
) { |
352
|
2 |
|
return ''; |
353
|
|
|
} |
354
|
|
|
|
355
|
5 |
|
return (string) \mb_substr($str, $substrIndex, $posEnd - $substrIndex); |
356
|
|
|
} |
357
|
|
|
|
358
|
8 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
359
|
|
|
|
360
|
8 |
|
$posStart = self::strpos($str, $start, $offset, $encoding); |
361
|
8 |
|
if ($posStart === false) { |
362
|
1 |
|
return ''; |
363
|
|
|
} |
364
|
|
|
|
365
|
7 |
|
$substrIndex = $posStart + (int) self::strlen($start, $encoding); |
366
|
7 |
|
$posEnd = self::strpos($str, $end, $substrIndex, $encoding); |
367
|
|
|
if ( |
368
|
7 |
|
$posEnd === false |
369
|
|
|
|| |
370
|
7 |
|
$posEnd === $substrIndex |
371
|
|
|
) { |
372
|
2 |
|
return ''; |
373
|
|
|
} |
374
|
|
|
|
375
|
5 |
|
return (string) self::substr( |
376
|
5 |
|
$str, |
377
|
5 |
|
$substrIndex, |
378
|
5 |
|
$posEnd - $substrIndex, |
379
|
5 |
|
$encoding |
380
|
|
|
); |
381
|
|
|
} |
382
|
|
|
|
383
|
|
|
/** |
384
|
|
|
* Convert binary into an string. |
385
|
|
|
* |
386
|
|
|
* @param mixed $bin 1|0 |
387
|
|
|
* |
388
|
|
|
* @return string |
389
|
|
|
*/ |
390
|
2 |
|
public static function binary_to_str($bin): string |
391
|
|
|
{ |
392
|
2 |
|
if (!isset($bin[0])) { |
393
|
|
|
return ''; |
394
|
|
|
} |
395
|
|
|
|
396
|
2 |
|
$convert = \base_convert($bin, 2, 16); |
397
|
2 |
|
if ($convert === '0') { |
398
|
1 |
|
return ''; |
399
|
|
|
} |
400
|
|
|
|
401
|
2 |
|
return \pack('H*', $convert); |
402
|
|
|
} |
403
|
|
|
|
404
|
|
|
/** |
405
|
|
|
* Returns the UTF-8 Byte Order Mark Character. |
406
|
|
|
* |
407
|
|
|
* INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values |
408
|
|
|
* |
409
|
|
|
* @return string UTF-8 Byte Order Mark |
410
|
|
|
*/ |
411
|
4 |
|
public static function bom(): string |
412
|
|
|
{ |
413
|
4 |
|
return "\xef\xbb\xbf"; |
414
|
|
|
} |
415
|
|
|
|
416
|
|
|
/** |
417
|
|
|
* @alias of UTF8::chr_map() |
418
|
|
|
* |
419
|
|
|
* @param array|string $callback |
420
|
|
|
* @param string $str |
421
|
|
|
* |
422
|
|
|
* @return string[] |
423
|
|
|
* |
424
|
|
|
* @see UTF8::chr_map() |
425
|
|
|
*/ |
426
|
2 |
|
public static function callback($callback, string $str): array |
427
|
|
|
{ |
428
|
2 |
|
return self::chr_map($callback, $str); |
429
|
|
|
} |
430
|
|
|
|
431
|
|
|
/** |
432
|
|
|
* Returns the character at $index, with indexes starting at 0. |
433
|
|
|
* |
434
|
|
|
* @param string $str <p>The input string.</p> |
435
|
|
|
* @param int $index <p>Position of the character.</p> |
436
|
|
|
* @param string $encoding [optional] <p>Default is UTF-8</p> |
437
|
|
|
* |
438
|
|
|
* @return string the character at $index |
439
|
|
|
*/ |
440
|
9 |
|
public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string |
441
|
|
|
{ |
442
|
9 |
|
if ($encoding === 'UTF-8') { |
443
|
5 |
|
return (string) \mb_substr($str, $index, 1); |
444
|
|
|
} |
445
|
|
|
|
446
|
4 |
|
return (string) self::substr($str, $index, 1, $encoding); |
447
|
|
|
} |
448
|
|
|
|
449
|
|
|
/** |
450
|
|
|
* Returns an array consisting of the characters in the string. |
451
|
|
|
* |
452
|
|
|
* @param string $str <p>The input string.</p> |
453
|
|
|
* |
454
|
|
|
* @return string[] an array of chars |
455
|
|
|
*/ |
456
|
3 |
|
public static function chars(string $str): array |
457
|
|
|
{ |
458
|
3 |
|
return self::str_split($str); |
459
|
|
|
} |
460
|
|
|
|
461
|
|
|
/** |
462
|
|
|
* This method will auto-detect your server environment for UTF-8 support. |
463
|
|
|
* |
464
|
|
|
* @return true|null |
465
|
|
|
* |
466
|
|
|
* @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p> |
467
|
|
|
*/ |
468
|
5 |
|
public static function checkForSupport() |
469
|
|
|
{ |
470
|
5 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
471
|
|
|
self::$SUPPORT['already_checked_via_portable_utf8'] = true; |
472
|
|
|
|
473
|
|
|
// http://php.net/manual/en/book.mbstring.php |
474
|
|
|
self::$SUPPORT['mbstring'] = self::mbstring_loaded(); |
475
|
|
|
self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded(); |
476
|
|
|
if (self::$SUPPORT['mbstring'] === true) { |
477
|
|
|
\mb_internal_encoding('UTF-8'); |
478
|
|
|
/** @noinspection UnusedFunctionResultInspection */ |
479
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
480
|
|
|
\mb_regex_encoding('UTF-8'); |
481
|
|
|
self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8'; |
482
|
|
|
} |
483
|
|
|
|
484
|
|
|
// http://php.net/manual/en/book.iconv.php |
485
|
|
|
self::$SUPPORT['iconv'] = self::iconv_loaded(); |
486
|
|
|
|
487
|
|
|
// http://php.net/manual/en/book.intl.php |
488
|
|
|
self::$SUPPORT['intl'] = self::intl_loaded(); |
489
|
|
|
|
490
|
|
|
// http://php.net/manual/en/class.intlchar.php |
491
|
|
|
self::$SUPPORT['intlChar'] = self::intlChar_loaded(); |
492
|
|
|
|
493
|
|
|
// http://php.net/manual/en/book.ctype.php |
494
|
|
|
self::$SUPPORT['ctype'] = self::ctype_loaded(); |
495
|
|
|
|
496
|
|
|
// http://php.net/manual/en/class.finfo.php |
497
|
|
|
self::$SUPPORT['finfo'] = self::finfo_loaded(); |
498
|
|
|
|
499
|
|
|
// http://php.net/manual/en/book.json.php |
500
|
|
|
self::$SUPPORT['json'] = self::json_loaded(); |
501
|
|
|
|
502
|
|
|
// http://php.net/manual/en/book.pcre.php |
503
|
|
|
self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support(); |
504
|
|
|
|
505
|
|
|
self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used(); |
506
|
|
|
if (self::$SUPPORT['symfony_polyfill_used'] === true) { |
507
|
|
|
\mb_internal_encoding('UTF-8'); |
508
|
|
|
self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8'; |
509
|
|
|
} |
510
|
|
|
|
511
|
|
|
return true; |
512
|
|
|
} |
513
|
|
|
|
514
|
5 |
|
return null; |
515
|
|
|
} |
516
|
|
|
|
517
|
|
|
/** |
518
|
|
|
* Generates a UTF-8 encoded character from the given code point. |
519
|
|
|
* |
520
|
|
|
* INFO: opposite to UTF8::ord() |
521
|
|
|
* |
522
|
|
|
* @param int|string $code_point <p>The code point for which to generate a character.</p> |
523
|
|
|
* @param string $encoding [optional] <p>Default is UTF-8</p> |
524
|
|
|
* |
525
|
|
|
* @return string|null multi-byte character, returns null on failure or empty input |
526
|
|
|
*/ |
527
|
25 |
|
public static function chr($code_point, string $encoding = 'UTF-8') |
528
|
|
|
{ |
529
|
|
|
// init |
530
|
25 |
|
static $CHAR_CACHE = []; |
531
|
|
|
|
532
|
25 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
533
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
534
|
|
|
} |
535
|
|
|
|
536
|
|
|
if ( |
537
|
25 |
|
$encoding !== 'UTF-8' |
538
|
|
|
&& |
539
|
25 |
|
$encoding !== 'ISO-8859-1' |
540
|
|
|
&& |
541
|
25 |
|
$encoding !== 'WINDOWS-1252' |
542
|
|
|
&& |
543
|
25 |
|
self::$SUPPORT['mbstring'] === false |
544
|
|
|
) { |
545
|
|
|
\trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
546
|
|
|
} |
547
|
|
|
|
548
|
25 |
|
$cacheKey = $code_point . $encoding; |
549
|
25 |
|
if (isset($CHAR_CACHE[$cacheKey]) === true) { |
550
|
23 |
|
return $CHAR_CACHE[$cacheKey]; |
551
|
|
|
} |
552
|
|
|
|
553
|
13 |
|
if ($code_point <= 127) { // use "simple"-char only until "\x80" |
554
|
|
|
|
555
|
12 |
|
if (self::$CHR === null) { |
556
|
|
|
self::$CHR = self::getData('chr'); |
557
|
|
|
} |
558
|
|
|
|
559
|
|
|
/** |
560
|
|
|
* @psalm-suppress PossiblyNullArrayAccess |
561
|
|
|
*/ |
562
|
12 |
|
$chr = self::$CHR[$code_point]; |
563
|
|
|
|
564
|
12 |
|
if ($encoding !== 'UTF-8') { |
565
|
1 |
|
$chr = self::encode($encoding, $chr); |
566
|
|
|
} |
567
|
|
|
|
568
|
12 |
|
return $CHAR_CACHE[$cacheKey] = $chr; |
569
|
|
|
} |
570
|
|
|
|
571
|
|
|
// |
572
|
|
|
// fallback via "IntlChar" |
573
|
|
|
// |
574
|
|
|
|
575
|
7 |
|
if (self::$SUPPORT['intlChar'] === true) { |
576
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
577
|
7 |
|
$chr = \IntlChar::chr($code_point); |
578
|
|
|
|
579
|
7 |
|
if ($encoding !== 'UTF-8') { |
580
|
|
|
$chr = self::encode($encoding, $chr); |
581
|
|
|
} |
582
|
|
|
|
583
|
7 |
|
return $CHAR_CACHE[$cacheKey] = $chr; |
584
|
|
|
} |
585
|
|
|
|
586
|
|
|
// |
587
|
|
|
// fallback via vanilla php |
588
|
|
|
// |
589
|
|
|
|
590
|
|
|
if (self::$CHR === null) { |
591
|
|
|
self::$CHR = self::getData('chr'); |
592
|
|
|
} |
593
|
|
|
|
594
|
|
|
$code_point = (int) $code_point; |
595
|
|
|
if ($code_point <= 0x7F) { |
596
|
|
|
/** |
597
|
|
|
* @psalm-suppress PossiblyNullArrayAccess |
598
|
|
|
*/ |
599
|
|
|
$chr = self::$CHR[$code_point]; |
600
|
|
|
} elseif ($code_point <= 0x7FF) { |
601
|
|
|
/** |
602
|
|
|
* @psalm-suppress PossiblyNullArrayAccess |
603
|
|
|
*/ |
604
|
|
|
$chr = self::$CHR[($code_point >> 6) + 0xC0] . |
605
|
|
|
self::$CHR[($code_point & 0x3F) + 0x80]; |
606
|
|
|
} elseif ($code_point <= 0xFFFF) { |
607
|
|
|
/** |
608
|
|
|
* @psalm-suppress PossiblyNullArrayAccess |
609
|
|
|
*/ |
610
|
|
|
$chr = self::$CHR[($code_point >> 12) + 0xE0] . |
611
|
|
|
self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] . |
612
|
|
|
self::$CHR[($code_point & 0x3F) + 0x80]; |
613
|
|
|
} else { |
614
|
|
|
/** |
615
|
|
|
* @psalm-suppress PossiblyNullArrayAccess |
616
|
|
|
*/ |
617
|
|
|
$chr = self::$CHR[($code_point >> 18) + 0xF0] . |
618
|
|
|
self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] . |
619
|
|
|
self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] . |
620
|
|
|
self::$CHR[($code_point & 0x3F) + 0x80]; |
621
|
|
|
} |
622
|
|
|
|
623
|
|
|
if ($encoding !== 'UTF-8') { |
624
|
|
|
$chr = self::encode($encoding, $chr); |
625
|
|
|
} |
626
|
|
|
|
627
|
|
|
return $CHAR_CACHE[$cacheKey] = $chr; |
628
|
|
|
} |
629
|
|
|
|
630
|
|
|
/** |
631
|
|
|
* Applies callback to all characters of a string. |
632
|
|
|
* |
633
|
|
|
* @param array|string $callback <p>The callback function.</p> |
634
|
|
|
* @param string $str <p>UTF-8 string to run callback on.</p> |
635
|
|
|
* |
636
|
|
|
* @return string[] the outcome of callback |
637
|
|
|
*/ |
638
|
2 |
|
public static function chr_map($callback, string $str): array |
639
|
|
|
{ |
640
|
2 |
|
return \array_map( |
641
|
2 |
|
$callback, |
642
|
2 |
|
self::str_split($str) |
643
|
|
|
); |
644
|
|
|
} |
645
|
|
|
|
646
|
|
|
/** |
647
|
|
|
* Generates an array of byte length of each character of a Unicode string. |
648
|
|
|
* |
649
|
|
|
* 1 byte => U+0000 - U+007F |
650
|
|
|
* 2 byte => U+0080 - U+07FF |
651
|
|
|
* 3 byte => U+0800 - U+FFFF |
652
|
|
|
* 4 byte => U+10000 - U+10FFFF |
653
|
|
|
* |
654
|
|
|
* @param string $str <p>The original unicode string.</p> |
655
|
|
|
* |
656
|
|
|
* @return int[] an array of byte lengths of each character |
657
|
|
|
*/ |
658
|
4 |
|
public static function chr_size_list(string $str): array |
659
|
|
|
{ |
660
|
4 |
|
if ($str === '') { |
661
|
4 |
|
return []; |
662
|
|
|
} |
663
|
|
|
|
664
|
4 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
665
|
|
|
return \array_map( |
666
|
|
|
static function (string $data): int { |
667
|
|
|
// "mb_" is available if overload is used, so use it ... |
668
|
|
|
return \mb_strlen($data, 'CP850'); // 8-BIT |
669
|
|
|
}, |
670
|
|
|
self::str_split($str) |
671
|
|
|
); |
672
|
|
|
} |
673
|
|
|
|
674
|
4 |
|
return \array_map('\strlen', self::str_split($str)); |
675
|
|
|
} |
676
|
|
|
|
677
|
|
|
/** |
678
|
|
|
* Get a decimal code representation of a specific character. |
679
|
|
|
* |
680
|
|
|
* @param string $char <p>The input character.</p> |
681
|
|
|
* |
682
|
|
|
* @return int |
683
|
|
|
*/ |
684
|
4 |
|
public static function chr_to_decimal(string $char): int |
685
|
|
|
{ |
686
|
4 |
|
$code = self::ord($char[0]); |
687
|
4 |
|
$bytes = 1; |
688
|
|
|
|
689
|
4 |
|
if (!($code & 0x80)) { |
690
|
|
|
// 0xxxxxxx |
691
|
4 |
|
return $code; |
692
|
|
|
} |
693
|
|
|
|
694
|
4 |
|
if (($code & 0xe0) === 0xc0) { |
695
|
|
|
// 110xxxxx |
696
|
4 |
|
$bytes = 2; |
697
|
4 |
|
$code &= ~0xc0; |
698
|
4 |
|
} elseif (($code & 0xf0) === 0xe0) { |
699
|
|
|
// 1110xxxx |
700
|
4 |
|
$bytes = 3; |
701
|
4 |
|
$code &= ~0xe0; |
702
|
2 |
|
} elseif (($code & 0xf8) === 0xf0) { |
703
|
|
|
// 11110xxx |
704
|
2 |
|
$bytes = 4; |
705
|
2 |
|
$code &= ~0xf0; |
706
|
|
|
} |
707
|
|
|
|
708
|
4 |
|
for ($i = 2; $i <= $bytes; ++$i) { |
709
|
|
|
// 10xxxxxx |
710
|
4 |
|
$code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80); |
711
|
|
|
} |
712
|
|
|
|
713
|
4 |
|
return $code; |
714
|
|
|
} |
715
|
|
|
|
716
|
|
|
/** |
717
|
|
|
* Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character. |
718
|
|
|
* |
719
|
|
|
* @param int|string $char <p>The input character</p> |
720
|
|
|
* @param string $pfix [optional] |
721
|
|
|
* |
722
|
|
|
* @return string The code point encoded as U+xxxx |
723
|
|
|
*/ |
724
|
2 |
|
public static function chr_to_hex($char, string $pfix = 'U+'): string |
725
|
|
|
{ |
726
|
2 |
|
if ($char === '') { |
727
|
2 |
|
return ''; |
728
|
|
|
} |
729
|
|
|
|
730
|
2 |
|
if ($char === '�') { |
731
|
2 |
|
$char = ''; |
732
|
|
|
} |
733
|
|
|
|
734
|
2 |
|
return self::int_to_hex(self::ord((string) $char), $pfix); |
735
|
|
|
} |
736
|
|
|
|
737
|
|
|
/** |
738
|
|
|
* alias for "UTF8::chr_to_decimal()" |
739
|
|
|
* |
740
|
|
|
* @param string $chr |
741
|
|
|
* |
742
|
|
|
* @return int |
743
|
|
|
* |
744
|
|
|
* @see UTF8::chr_to_decimal() |
745
|
|
|
*/ |
746
|
2 |
|
public static function chr_to_int(string $chr): int |
747
|
|
|
{ |
748
|
2 |
|
return self::chr_to_decimal($chr); |
749
|
|
|
} |
750
|
|
|
|
751
|
|
|
/** |
752
|
|
|
* Splits a string into smaller chunks and multiple lines, using the specified line ending character. |
753
|
|
|
* |
754
|
|
|
* @param string $body <p>The original string to be split.</p> |
755
|
|
|
* @param int $chunklen [optional] <p>The maximum character length of a chunk.</p> |
756
|
|
|
* @param string $end [optional] <p>The character(s) to be inserted at the end of each chunk.</p> |
757
|
|
|
* |
758
|
|
|
* @return string the chunked string |
759
|
|
|
*/ |
760
|
4 |
|
public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string |
761
|
|
|
{ |
762
|
4 |
|
return \implode($end, self::str_split($body, $chunklen)); |
763
|
|
|
} |
764
|
|
|
|
765
|
|
|
/** |
766
|
|
|
* Accepts a string and removes all non-UTF-8 characters from it + extras if needed. |
767
|
|
|
* |
768
|
|
|
* @param string $str <p>The string to be sanitized.</p> |
769
|
|
|
* @param bool $remove_bom [optional] <p>Set to true, if you need to remove UTF-BOM.</p> |
770
|
|
|
* @param bool $normalize_whitespace [optional] <p>Set to true, if you need to normalize the |
771
|
|
|
* whitespace.</p> |
772
|
|
|
* @param bool $normalize_msword [optional] <p>Set to true, if you need to normalize MS Word chars |
773
|
|
|
* e.g.: "…" |
774
|
|
|
* => "..."</p> |
775
|
|
|
* @param bool $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in |
776
|
|
|
* combination with |
777
|
|
|
* $normalize_whitespace</p> |
778
|
|
|
* @param bool $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question |
779
|
|
|
* mark e.g.: "�"</p> |
780
|
|
|
* @param bool $remove_invisible_characters [optional] <p>Set to false, if you not want to remove invisible |
781
|
|
|
* characters e.g.: "\0"</p> |
782
|
|
|
* |
783
|
|
|
* @return string clean UTF-8 encoded string |
784
|
|
|
*/ |
785
|
114 |
|
public static function clean( |
786
|
|
|
string $str, |
787
|
|
|
bool $remove_bom = false, |
788
|
|
|
bool $normalize_whitespace = false, |
789
|
|
|
bool $normalize_msword = false, |
790
|
|
|
bool $keep_non_breaking_space = false, |
791
|
|
|
bool $replace_diamond_question_mark = false, |
792
|
|
|
bool $remove_invisible_characters = true |
793
|
|
|
): string { |
794
|
|
|
// http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string |
795
|
|
|
// caused connection reset problem on larger strings |
796
|
|
|
|
797
|
114 |
|
$regex = '/ |
798
|
|
|
( |
799
|
|
|
(?: [\x00-\x7F] # single-byte sequences 0xxxxxxx |
800
|
|
|
| [\xC0-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx |
801
|
|
|
| [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences 1110xxxx 10xxxxxx * 2 |
802
|
|
|
| [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3 |
803
|
|
|
){1,100} # ...one or more times |
804
|
|
|
) |
805
|
|
|
| ( [\x80-\xBF] ) # invalid byte in range 10000000 - 10111111 |
806
|
|
|
| ( [\xC0-\xFF] ) # invalid byte in range 11000000 - 11111111 |
807
|
|
|
/x'; |
808
|
|
|
/** @noinspection NotOptimalRegularExpressionsInspection */ |
809
|
114 |
|
$str = (string) \preg_replace($regex, '$1', $str); |
810
|
|
|
|
811
|
114 |
|
if ($replace_diamond_question_mark === true) { |
812
|
60 |
|
$str = self::replace_diamond_question_mark($str, ''); |
813
|
|
|
} |
814
|
|
|
|
815
|
114 |
|
if ($remove_invisible_characters === true) { |
816
|
114 |
|
$str = self::remove_invisible_characters($str); |
817
|
|
|
} |
818
|
|
|
|
819
|
114 |
|
if ($normalize_whitespace === true) { |
820
|
64 |
|
$str = self::normalize_whitespace($str, $keep_non_breaking_space); |
821
|
|
|
} |
822
|
|
|
|
823
|
114 |
|
if ($normalize_msword === true) { |
824
|
32 |
|
$str = self::normalize_msword($str); |
825
|
|
|
} |
826
|
|
|
|
827
|
114 |
|
if ($remove_bom === true) { |
828
|
64 |
|
$str = self::remove_bom($str); |
829
|
|
|
} |
830
|
|
|
|
831
|
114 |
|
return $str; |
832
|
|
|
} |
833
|
|
|
|
834
|
|
|
/** |
835
|
|
|
* Clean-up a and show only printable UTF-8 chars at the end + fix UTF-8 encoding. |
836
|
|
|
* |
837
|
|
|
* @param string $str <p>The input string.</p> |
838
|
|
|
* |
839
|
|
|
* @return string |
840
|
|
|
*/ |
841
|
33 |
|
public static function cleanup($str): string |
842
|
|
|
{ |
843
|
|
|
// init |
844
|
33 |
|
$str = (string) $str; |
845
|
|
|
|
846
|
33 |
|
if ($str === '') { |
847
|
5 |
|
return ''; |
848
|
|
|
} |
849
|
|
|
|
850
|
|
|
// fixed ISO <-> UTF-8 Errors |
851
|
33 |
|
$str = self::fix_simple_utf8($str); |
852
|
|
|
|
853
|
|
|
// remove all none UTF-8 symbols |
854
|
|
|
// && remove diamond question mark (�) |
855
|
|
|
// && remove remove invisible characters (e.g. "\0") |
856
|
|
|
// && remove BOM |
857
|
|
|
// && normalize whitespace chars (but keep non-breaking-spaces) |
858
|
33 |
|
return self::clean( |
859
|
33 |
|
$str, |
860
|
33 |
|
true, |
861
|
33 |
|
true, |
862
|
33 |
|
false, |
863
|
33 |
|
true, |
864
|
33 |
|
true, |
865
|
33 |
|
true |
866
|
|
|
); |
867
|
|
|
} |
868
|
|
|
|
869
|
|
|
/** |
870
|
|
|
* Accepts a string or a array of strings and returns an array of Unicode code points. |
871
|
|
|
* |
872
|
|
|
* INFO: opposite to UTF8::string() |
873
|
|
|
* |
874
|
|
|
* @param string|string[] $arg <p>A UTF-8 encoded string or an array of such strings.</p> |
875
|
|
|
* @param bool $u_style <p>If True, will return code points in U+xxxx format, |
876
|
|
|
* default, code points will be returned as integers.</p> |
877
|
|
|
* |
878
|
|
|
* @return array<int|string> |
879
|
|
|
* The array of code points:<br> |
880
|
|
|
* array<int> for $u_style === false<br> |
881
|
|
|
* array<string> for $u_style === true<br> |
882
|
|
|
*/ |
883
|
12 |
|
public static function codepoints($arg, bool $u_style = false): array |
884
|
|
|
{ |
885
|
12 |
|
if (\is_string($arg) === true) { |
886
|
12 |
|
$arg = self::str_split($arg); |
887
|
|
|
} |
888
|
|
|
|
889
|
12 |
|
$arg = \array_map( |
890
|
|
|
[ |
891
|
12 |
|
self::class, |
892
|
|
|
'ord', |
893
|
|
|
], |
894
|
12 |
|
$arg |
895
|
|
|
); |
896
|
|
|
|
897
|
12 |
|
if (\count($arg) === 0) { |
898
|
7 |
|
return []; |
899
|
|
|
} |
900
|
|
|
|
901
|
11 |
|
if ($u_style === true) { |
902
|
2 |
|
$arg = \array_map( |
903
|
|
|
[ |
904
|
2 |
|
self::class, |
905
|
|
|
'int_to_hex', |
906
|
|
|
], |
907
|
2 |
|
$arg |
908
|
|
|
); |
909
|
|
|
} |
910
|
|
|
|
911
|
11 |
|
return $arg; |
912
|
|
|
} |
913
|
|
|
|
914
|
|
|
/** |
915
|
|
|
* Trims the string and replaces consecutive whitespace characters with a |
916
|
|
|
* single space. This includes tabs and newline characters, as well as |
917
|
|
|
* multibyte whitespace such as the thin space and ideographic space. |
918
|
|
|
* |
919
|
|
|
* @param string $str <p>The input string.</p> |
920
|
|
|
* |
921
|
|
|
* @return string string with a trimmed $str and condensed whitespace |
922
|
|
|
*/ |
923
|
13 |
|
public static function collapse_whitespace(string $str): string |
924
|
|
|
{ |
925
|
13 |
|
if (self::$SUPPORT['mbstring'] === true) { |
926
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
927
|
13 |
|
return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str)); |
928
|
|
|
} |
929
|
|
|
|
930
|
|
|
return \trim(self::regex_replace($str, '[[:space:]]+', ' ')); |
931
|
|
|
} |
932
|
|
|
|
933
|
|
|
/** |
934
|
|
|
* Returns count of characters used in a string. |
935
|
|
|
* |
936
|
|
|
* @param string $str <p>The input string.</p> |
937
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
938
|
|
|
* @param bool $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use |
939
|
|
|
* |
940
|
|
|
* @return int[] an associative array of Character as keys and |
941
|
|
|
* their count as values |
942
|
|
|
*/ |
943
|
19 |
|
public static function count_chars( |
944
|
|
|
string $str, |
945
|
|
|
bool $cleanUtf8 = false, |
946
|
|
|
bool $tryToUseMbFunction = true |
947
|
|
|
): array { |
948
|
19 |
|
return \array_count_values( |
949
|
19 |
|
self::str_split( |
950
|
19 |
|
$str, |
951
|
19 |
|
1, |
952
|
19 |
|
$cleanUtf8, |
953
|
19 |
|
$tryToUseMbFunction |
954
|
|
|
) |
955
|
|
|
); |
956
|
|
|
} |
957
|
|
|
|
958
|
|
|
/** |
959
|
|
|
* Remove css media-queries. |
960
|
|
|
* |
961
|
|
|
* @param string $str |
962
|
|
|
* |
963
|
|
|
* @return string |
964
|
|
|
*/ |
965
|
1 |
|
public static function css_stripe_media_queries(string $str): string |
966
|
|
|
{ |
967
|
1 |
|
return (string) \preg_replace( |
968
|
1 |
|
'#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU', |
969
|
1 |
|
'', |
970
|
1 |
|
$str |
971
|
|
|
); |
972
|
|
|
} |
973
|
|
|
|
974
|
|
|
/** |
975
|
|
|
* Checks whether ctype is available on the server. |
976
|
|
|
* |
977
|
|
|
* @return bool |
978
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
979
|
|
|
*/ |
980
|
|
|
public static function ctype_loaded(): bool |
981
|
|
|
{ |
982
|
|
|
return \extension_loaded('ctype'); |
983
|
|
|
} |
984
|
|
|
|
985
|
|
|
/** |
986
|
|
|
* Converts a int-value into an UTF-8 character. |
987
|
|
|
* |
988
|
|
|
* @param mixed $int |
989
|
|
|
* |
990
|
|
|
* @return string |
991
|
|
|
*/ |
992
|
19 |
|
public static function decimal_to_chr($int): string |
993
|
|
|
{ |
994
|
19 |
|
return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5); |
995
|
|
|
} |
996
|
|
|
|
997
|
|
|
/** |
998
|
|
|
* Decodes a MIME header field |
999
|
|
|
* |
1000
|
|
|
* @param string $str |
1001
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
1002
|
|
|
* |
1003
|
|
|
* @return false|string |
1004
|
|
|
* A decoded MIME field on success, |
1005
|
|
|
* or false if an error occurs during the decoding |
1006
|
|
|
*/ |
1007
|
|
|
public static function decode_mimeheader($str, string $encoding = 'UTF-8') |
1008
|
|
|
{ |
1009
|
|
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
1010
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
1011
|
|
|
} |
1012
|
|
|
|
1013
|
|
|
if (self::$SUPPORT['iconv'] === true) { |
1014
|
|
|
return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding); |
1015
|
|
|
} |
1016
|
|
|
|
1017
|
|
|
if ($encoding !== 'UTF-8') { |
1018
|
|
|
$str = self::encode($encoding, $str); |
1019
|
|
|
} |
1020
|
|
|
|
1021
|
|
|
return \mb_decode_mimeheader($str); |
1022
|
|
|
} |
1023
|
|
|
|
1024
|
|
|
/** |
1025
|
|
|
* Decodes a string which was encoded by "UTF8::emoji_encode()". |
1026
|
|
|
* |
1027
|
|
|
* @param string $str <p>The input string.</p> |
1028
|
|
|
* @param bool $useReversibleStringMapping [optional] <p> |
1029
|
|
|
* When <b>TRUE</b>, we se a reversible string mapping |
1030
|
|
|
* between "emoji_encode" and "emoji_decode".</p> |
1031
|
|
|
* |
1032
|
|
|
* @return string |
1033
|
|
|
*/ |
1034
|
9 |
|
public static function emoji_decode(string $str, bool $useReversibleStringMapping = false): string |
1035
|
|
|
{ |
1036
|
9 |
|
self::initEmojiData(); |
1037
|
|
|
|
1038
|
9 |
|
if ($useReversibleStringMapping === true) { |
1039
|
9 |
|
return (string) \str_replace( |
1040
|
9 |
|
(array) self::$EMOJI_KEYS_REVERSIBLE_CACHE, |
1041
|
9 |
|
(array) self::$EMOJI_VALUES_CACHE, |
1042
|
9 |
|
$str |
1043
|
|
|
); |
1044
|
|
|
} |
1045
|
|
|
|
1046
|
1 |
|
return (string) \str_replace( |
1047
|
1 |
|
(array) self::$EMOJI_KEYS_CACHE, |
1048
|
1 |
|
(array) self::$EMOJI_VALUES_CACHE, |
1049
|
1 |
|
$str |
1050
|
|
|
); |
1051
|
|
|
} |
1052
|
|
|
|
1053
|
|
|
/** |
1054
|
|
|
* Encode a string with emoji chars into a non-emoji string. |
1055
|
|
|
* |
1056
|
|
|
* @param string $str <p>The input string</p> |
1057
|
|
|
* @param bool $useReversibleStringMapping [optional] <p> |
1058
|
|
|
* when <b>TRUE</b>, we se a reversible string mapping |
1059
|
|
|
* between "emoji_encode" and "emoji_decode"</p> |
1060
|
|
|
* |
1061
|
|
|
* @return string |
1062
|
|
|
*/ |
1063
|
12 |
|
public static function emoji_encode(string $str, bool $useReversibleStringMapping = false): string |
1064
|
|
|
{ |
1065
|
12 |
|
self::initEmojiData(); |
1066
|
|
|
|
1067
|
12 |
|
if ($useReversibleStringMapping === true) { |
1068
|
9 |
|
return (string) \str_replace( |
1069
|
9 |
|
(array) self::$EMOJI_VALUES_CACHE, |
1070
|
9 |
|
(array) self::$EMOJI_KEYS_REVERSIBLE_CACHE, |
1071
|
9 |
|
$str |
1072
|
|
|
); |
1073
|
|
|
} |
1074
|
|
|
|
1075
|
4 |
|
return (string) \str_replace( |
1076
|
4 |
|
(array) self::$EMOJI_VALUES_CACHE, |
1077
|
4 |
|
(array) self::$EMOJI_KEYS_CACHE, |
1078
|
4 |
|
$str |
1079
|
|
|
); |
1080
|
|
|
} |
1081
|
|
|
|
1082
|
|
|
/** |
1083
|
|
|
* Encode a string with a new charset-encoding. |
1084
|
|
|
* |
1085
|
|
|
* INFO: The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding, |
1086
|
|
|
* so you can call this function also on a UTF-8 String and you don't mess the string. |
1087
|
|
|
* |
1088
|
|
|
* @param string $toEncoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p> |
1089
|
|
|
* @param string $str <p>The input string</p> |
1090
|
|
|
* @param bool $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double |
1091
|
|
|
* encoding for UTF-8)<br> otherwise we auto-detect the current |
1092
|
|
|
* string-encoding</p> |
1093
|
|
|
* @param string $fromEncoding [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br> |
1094
|
|
|
* A empty string will trigger the autodetect anyway.</p> |
1095
|
|
|
* |
1096
|
|
|
* @return string |
1097
|
|
|
* |
1098
|
|
|
* @psalm-suppress InvalidReturnStatement |
1099
|
|
|
*/ |
1100
|
28 |
|
public static function encode( |
1101
|
|
|
string $toEncoding, |
1102
|
|
|
string $str, |
1103
|
|
|
bool $autodetectFromEncoding = true, |
1104
|
|
|
string $fromEncoding = '' |
1105
|
|
|
): string { |
1106
|
28 |
|
if ($str === '' || $toEncoding === '') { |
1107
|
13 |
|
return $str; |
1108
|
|
|
} |
1109
|
|
|
|
1110
|
28 |
|
if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') { |
1111
|
7 |
|
$toEncoding = self::normalize_encoding($toEncoding, 'UTF-8'); |
1112
|
|
|
} |
1113
|
|
|
|
1114
|
28 |
|
if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') { |
1115
|
2 |
|
$fromEncoding = self::normalize_encoding($fromEncoding, null); |
1116
|
|
|
} |
1117
|
|
|
|
1118
|
|
|
if ( |
1119
|
28 |
|
$toEncoding |
1120
|
|
|
&& |
1121
|
28 |
|
$fromEncoding |
1122
|
|
|
&& |
1123
|
28 |
|
$fromEncoding === $toEncoding |
1124
|
|
|
) { |
1125
|
|
|
return $str; |
1126
|
|
|
} |
1127
|
|
|
|
1128
|
28 |
|
if ($toEncoding === 'JSON') { |
1129
|
1 |
|
$return = self::json_encode($str); |
1130
|
1 |
|
if ($return === false) { |
1131
|
|
|
throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().'); |
1132
|
|
|
} |
1133
|
|
|
|
1134
|
1 |
|
return $return; |
1135
|
|
|
} |
1136
|
28 |
|
if ($fromEncoding === 'JSON') { |
1137
|
1 |
|
$str = self::json_decode($str); |
1138
|
1 |
|
$fromEncoding = ''; |
1139
|
|
|
} |
1140
|
|
|
|
1141
|
28 |
|
if ($toEncoding === 'BASE64') { |
1142
|
2 |
|
return \base64_encode($str); |
1143
|
|
|
} |
1144
|
28 |
|
if ($fromEncoding === 'BASE64') { |
1145
|
2 |
|
$str = \base64_decode($str, true); |
1146
|
2 |
|
$fromEncoding = ''; |
1147
|
|
|
} |
1148
|
|
|
|
1149
|
28 |
|
if ($toEncoding === 'HTML-ENTITIES') { |
1150
|
2 |
|
return self::html_encode($str, true, 'UTF-8'); |
1151
|
|
|
} |
1152
|
28 |
|
if ($fromEncoding === 'HTML-ENTITIES') { |
1153
|
2 |
|
$str = self::html_decode($str, \ENT_COMPAT, 'UTF-8'); |
1154
|
2 |
|
$fromEncoding = ''; |
1155
|
|
|
} |
1156
|
|
|
|
1157
|
28 |
|
$fromEncodingDetected = false; |
1158
|
|
|
if ( |
1159
|
28 |
|
$autodetectFromEncoding === true |
1160
|
|
|
|| |
1161
|
28 |
|
!$fromEncoding |
1162
|
|
|
) { |
1163
|
28 |
|
$fromEncodingDetected = self::str_detect_encoding($str); |
1164
|
|
|
} |
1165
|
|
|
|
1166
|
|
|
// DEBUG |
1167
|
|
|
//var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n"); |
1168
|
|
|
|
1169
|
28 |
|
if ($fromEncodingDetected !== false) { |
1170
|
24 |
|
$fromEncoding = $fromEncodingDetected; |
1171
|
7 |
|
} elseif ($autodetectFromEncoding === true) { |
1172
|
|
|
// fallback for the "autodetect"-mode |
1173
|
7 |
|
return self::to_utf8($str); |
1174
|
|
|
} |
1175
|
|
|
|
1176
|
|
|
if ( |
1177
|
24 |
|
!$fromEncoding |
1178
|
|
|
|| |
1179
|
24 |
|
$fromEncoding === $toEncoding |
1180
|
|
|
) { |
1181
|
15 |
|
return $str; |
1182
|
|
|
} |
1183
|
|
|
|
1184
|
|
|
if ( |
1185
|
19 |
|
$toEncoding === 'UTF-8' |
1186
|
|
|
&& |
1187
|
|
|
( |
1188
|
17 |
|
$fromEncoding === 'WINDOWS-1252' |
1189
|
|
|
|| |
1190
|
19 |
|
$fromEncoding === 'ISO-8859-1' |
1191
|
|
|
) |
1192
|
|
|
) { |
1193
|
13 |
|
return self::to_utf8($str); |
1194
|
|
|
} |
1195
|
|
|
|
1196
|
|
|
if ( |
1197
|
12 |
|
$toEncoding === 'ISO-8859-1' |
1198
|
|
|
&& |
1199
|
|
|
( |
1200
|
6 |
|
$fromEncoding === 'WINDOWS-1252' |
1201
|
|
|
|| |
1202
|
12 |
|
$fromEncoding === 'UTF-8' |
1203
|
|
|
) |
1204
|
|
|
) { |
1205
|
6 |
|
return self::to_iso8859($str); |
1206
|
|
|
} |
1207
|
|
|
|
1208
|
|
|
if ( |
1209
|
10 |
|
$toEncoding !== 'UTF-8' |
1210
|
|
|
&& |
1211
|
10 |
|
$toEncoding !== 'ISO-8859-1' |
1212
|
|
|
&& |
1213
|
10 |
|
$toEncoding !== 'WINDOWS-1252' |
1214
|
|
|
&& |
1215
|
10 |
|
self::$SUPPORT['mbstring'] === false |
1216
|
|
|
) { |
1217
|
|
|
\trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING); |
1218
|
|
|
} |
1219
|
|
|
|
1220
|
10 |
|
if (self::$SUPPORT['mbstring'] === true) { |
1221
|
|
|
// warning: do not use the symfony polyfill here |
1222
|
10 |
|
$strEncoded = \mb_convert_encoding( |
1223
|
10 |
|
$str, |
1224
|
10 |
|
$toEncoding, |
1225
|
10 |
|
$fromEncoding |
1226
|
|
|
); |
1227
|
|
|
|
1228
|
10 |
|
if ($strEncoded) { |
1229
|
10 |
|
return $strEncoded; |
1230
|
|
|
} |
1231
|
|
|
} |
1232
|
|
|
|
1233
|
|
|
$return = \iconv($fromEncoding, $toEncoding, $str); |
1234
|
|
|
if ($return !== false) { |
1235
|
|
|
return $return; |
1236
|
|
|
} |
1237
|
|
|
|
1238
|
|
|
return $str; |
1239
|
|
|
} |
1240
|
|
|
|
1241
|
|
|
/** |
1242
|
|
|
* @param string $str |
1243
|
|
|
* @param string $fromCharset [optional] <p>Set the input charset.</p> |
1244
|
|
|
* @param string $toCharset [optional] <p>Set the output charset.</p> |
1245
|
|
|
* @param string $transferEncoding [optional] <p>Set the transfer encoding.</p> |
1246
|
|
|
* @param string $linefeed [optional] <p>Set the used linefeed.</p> |
1247
|
|
|
* @param int $indent [optional] <p>Set the max length indent.</p> |
1248
|
|
|
* |
1249
|
|
|
* @return false|string |
1250
|
|
|
* An encoded MIME field on success, |
1251
|
|
|
* or false if an error occurs during the encoding |
1252
|
|
|
*/ |
1253
|
|
|
public static function encode_mimeheader( |
1254
|
|
|
$str, |
1255
|
|
|
$fromCharset = 'UTF-8', |
1256
|
|
|
$toCharset = 'UTF-8', |
1257
|
|
|
$transferEncoding = 'Q', |
1258
|
|
|
$linefeed = '\\r\\n', |
1259
|
|
|
$indent = 76 |
1260
|
|
|
) { |
1261
|
|
|
if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') { |
1262
|
|
|
$fromCharset = self::normalize_encoding($fromCharset, 'UTF-8'); |
1263
|
|
|
} |
1264
|
|
|
|
1265
|
|
|
if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') { |
1266
|
|
|
$toCharset = self::normalize_encoding($toCharset, 'UTF-8'); |
1267
|
|
|
} |
1268
|
|
|
|
1269
|
|
|
return \iconv_mime_encode( |
1270
|
|
|
'', |
1271
|
|
|
$str, |
1272
|
|
|
[ |
1273
|
|
|
'scheme' => $transferEncoding, |
1274
|
|
|
'line-length' => $indent, |
1275
|
|
|
'input-charset' => $fromCharset, |
1276
|
|
|
'output-charset' => $toCharset, |
1277
|
|
|
'line-break-chars' => $linefeed, |
1278
|
|
|
] |
1279
|
|
|
); |
1280
|
|
|
} |
1281
|
|
|
|
1282
|
|
|
/** |
1283
|
|
|
* Create an extract from a sentence, so if the search-string was found, it try to centered in the output. |
1284
|
|
|
* |
1285
|
|
|
* @param string $str <p>The input string.</p> |
1286
|
|
|
* @param string $search <p>The searched string.</p> |
1287
|
|
|
* @param int|null $length [optional] <p>Default: null === text->length / 2</p> |
1288
|
|
|
* @param string $replacerForSkippedText [optional] <p>Default: …</p> |
1289
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
1290
|
|
|
* |
1291
|
|
|
* @return string |
1292
|
|
|
*/ |
1293
|
1 |
|
public static function extract_text( |
1294
|
|
|
string $str, |
1295
|
|
|
string $search = '', |
1296
|
|
|
int $length = null, |
1297
|
|
|
string $replacerForSkippedText = '…', |
1298
|
|
|
string $encoding = 'UTF-8' |
1299
|
|
|
): string { |
1300
|
1 |
|
if ($str === '') { |
1301
|
1 |
|
return ''; |
1302
|
|
|
} |
1303
|
|
|
|
1304
|
1 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
1305
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
1306
|
|
|
} |
1307
|
|
|
|
1308
|
1 |
|
$trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&"; |
1309
|
|
|
|
1310
|
1 |
|
if ($length === null) { |
1311
|
1 |
|
$length = (int) \round((int) self::strlen($str, $encoding) / 2, 0); |
1312
|
|
|
} |
1313
|
|
|
|
1314
|
1 |
|
if ($search === '') { |
1315
|
1 |
|
if ($encoding === 'UTF-8') { |
1316
|
1 |
|
if ($length > 0) { |
1317
|
1 |
|
$stringLength = (int) \mb_strlen($str); |
1318
|
1 |
|
$end = ($length - 1) > $stringLength ? $stringLength : ($length - 1); |
1319
|
|
|
} else { |
1320
|
1 |
|
$end = 0; |
1321
|
|
|
} |
1322
|
|
|
|
1323
|
1 |
|
$pos = (int) \min( |
1324
|
1 |
|
\mb_strpos($str, ' ', $end), |
1325
|
1 |
|
\mb_strpos($str, '.', $end) |
1326
|
|
|
); |
1327
|
|
|
} else { |
1328
|
|
|
if ($length > 0) { |
1329
|
|
|
$stringLength = (int) self::strlen($str, $encoding); |
1330
|
|
|
$end = ($length - 1) > $stringLength ? $stringLength : ($length - 1); |
1331
|
|
|
} else { |
1332
|
|
|
$end = 0; |
1333
|
|
|
} |
1334
|
|
|
|
1335
|
|
|
$pos = (int) \min( |
1336
|
|
|
self::strpos($str, ' ', $end, $encoding), |
1337
|
|
|
self::strpos($str, '.', $end, $encoding) |
1338
|
|
|
); |
1339
|
|
|
} |
1340
|
|
|
|
1341
|
1 |
|
if ($pos) { |
1342
|
1 |
|
if ($encoding === 'UTF-8') { |
1343
|
1 |
|
$strSub = \mb_substr($str, 0, $pos); |
1344
|
|
|
} else { |
1345
|
|
|
$strSub = self::substr($str, 0, $pos, $encoding); |
1346
|
|
|
} |
1347
|
|
|
|
1348
|
1 |
|
if ($strSub === false) { |
1349
|
|
|
return ''; |
1350
|
|
|
} |
1351
|
|
|
|
1352
|
1 |
|
return \rtrim($strSub, $trimChars) . $replacerForSkippedText; |
1353
|
|
|
} |
1354
|
|
|
|
1355
|
|
|
return $str; |
1356
|
|
|
} |
1357
|
|
|
|
1358
|
1 |
|
if ($encoding === 'UTF-8') { |
1359
|
1 |
|
$wordPos = (int) \mb_stripos($str, $search); |
1360
|
1 |
|
$halfSide = (int) ($wordPos - $length / 2 + (int) \mb_strlen($search) / 2); |
1361
|
|
|
} else { |
1362
|
|
|
$wordPos = (int) self::stripos($str, $search, 0, $encoding); |
1363
|
|
|
$halfSide = (int) ($wordPos - $length / 2 + (int) self::strlen($search, $encoding) / 2); |
1364
|
|
|
} |
1365
|
|
|
|
1366
|
1 |
|
$pos_start = 0; |
1367
|
1 |
|
if ($halfSide > 0) { |
1368
|
1 |
|
if ($encoding === 'UTF-8') { |
1369
|
1 |
|
$halfText = \mb_substr($str, 0, $halfSide); |
1370
|
|
|
} else { |
1371
|
|
|
$halfText = self::substr($str, 0, $halfSide, $encoding); |
1372
|
|
|
} |
1373
|
1 |
|
if ($halfText !== false) { |
1374
|
1 |
|
if ($encoding === 'UTF-8') { |
1375
|
1 |
|
$pos_start = (int) \max( |
1376
|
1 |
|
\mb_strrpos($halfText, ' '), |
1377
|
1 |
|
\mb_strrpos($halfText, '.') |
1378
|
|
|
); |
1379
|
|
|
} else { |
1380
|
|
|
$pos_start = (int) \max( |
1381
|
|
|
self::strrpos($halfText, ' ', 0, $encoding), |
1382
|
|
|
self::strrpos($halfText, '.', 0, $encoding) |
1383
|
|
|
); |
1384
|
|
|
} |
1385
|
|
|
} |
1386
|
|
|
} |
1387
|
|
|
|
1388
|
1 |
|
if ($wordPos && $halfSide > 0) { |
1389
|
1 |
|
$offset = $pos_start + $length - 1; |
1390
|
1 |
|
$realLength = (int) self::strlen($str, $encoding); |
1391
|
|
|
|
1392
|
1 |
|
if ($offset > $realLength) { |
1393
|
|
|
$offset = $realLength; |
1394
|
|
|
} |
1395
|
|
|
|
1396
|
1 |
|
if ($encoding === 'UTF-8') { |
1397
|
1 |
|
$pos_end = (int) \min( |
1398
|
1 |
|
\mb_strpos($str, ' ', $offset), |
1399
|
1 |
|
\mb_strpos($str, '.', $offset) |
1400
|
1 |
|
) - $pos_start; |
1401
|
|
|
} else { |
1402
|
|
|
$pos_end = (int) \min( |
1403
|
|
|
self::strpos($str, ' ', $offset, $encoding), |
1404
|
|
|
self::strpos($str, '.', $offset, $encoding) |
1405
|
|
|
) - $pos_start; |
1406
|
|
|
} |
1407
|
|
|
|
1408
|
1 |
|
if (!$pos_end || $pos_end <= 0) { |
1409
|
1 |
|
if ($encoding === 'UTF-8') { |
1410
|
1 |
|
$strSub = \mb_substr($str, $pos_start, (int) \mb_strlen($str)); |
1411
|
|
|
} else { |
1412
|
|
|
$strSub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding); |
1413
|
|
|
} |
1414
|
1 |
|
if ($strSub !== false) { |
1415
|
1 |
|
$extract = $replacerForSkippedText . \ltrim($strSub, $trimChars); |
1416
|
|
|
} else { |
1417
|
1 |
|
$extract = ''; |
1418
|
|
|
} |
1419
|
|
|
} else { |
1420
|
1 |
|
if ($encoding === 'UTF-8') { |
1421
|
1 |
|
$strSub = \mb_substr($str, $pos_start, $pos_end); |
1422
|
|
|
} else { |
1423
|
|
|
$strSub = self::substr($str, $pos_start, $pos_end, $encoding); |
1424
|
|
|
} |
1425
|
1 |
|
if ($strSub !== false) { |
1426
|
1 |
|
$extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText; |
1427
|
|
|
} else { |
1428
|
1 |
|
$extract = ''; |
1429
|
|
|
} |
1430
|
|
|
} |
1431
|
|
|
} else { |
1432
|
1 |
|
$offset = $length - 1; |
1433
|
1 |
|
$trueLength = (int) self::strlen($str, $encoding); |
1434
|
|
|
|
1435
|
1 |
|
if ($offset > $trueLength) { |
1436
|
|
|
$offset = $trueLength; |
1437
|
|
|
} |
1438
|
|
|
|
1439
|
1 |
|
if ($encoding === 'UTF-8') { |
1440
|
1 |
|
$pos_end = (int) \min( |
1441
|
1 |
|
\mb_strpos($str, ' ', $offset), |
1442
|
1 |
|
\mb_strpos($str, '.', $offset) |
1443
|
|
|
); |
1444
|
|
|
} else { |
1445
|
|
|
$pos_end = (int) \min( |
1446
|
|
|
self::strpos($str, ' ', $offset, $encoding), |
1447
|
|
|
self::strpos($str, '.', $offset, $encoding) |
1448
|
|
|
); |
1449
|
|
|
} |
1450
|
|
|
|
1451
|
1 |
|
if ($pos_end) { |
1452
|
1 |
|
if ($encoding === 'UTF-8') { |
1453
|
1 |
|
$strSub = \mb_substr($str, 0, $pos_end); |
1454
|
|
|
} else { |
1455
|
|
|
$strSub = self::substr($str, 0, $pos_end, $encoding); |
1456
|
|
|
} |
1457
|
1 |
|
if ($strSub !== false) { |
1458
|
1 |
|
$extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText; |
1459
|
|
|
} else { |
1460
|
1 |
|
$extract = ''; |
1461
|
|
|
} |
1462
|
|
|
} else { |
1463
|
1 |
|
$extract = $str; |
1464
|
|
|
} |
1465
|
|
|
} |
1466
|
|
|
|
1467
|
1 |
|
return $extract; |
1468
|
|
|
} |
1469
|
|
|
|
1470
|
|
|
/** |
1471
|
|
|
* Reads entire file into a string. |
1472
|
|
|
* |
1473
|
|
|
* WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!! |
1474
|
|
|
* |
1475
|
|
|
* @see http://php.net/manual/en/function.file-get-contents.php |
1476
|
|
|
* |
1477
|
|
|
* @param string $filename <p> |
1478
|
|
|
* Name of the file to read. |
1479
|
|
|
* </p> |
1480
|
|
|
* @param bool $use_include_path [optional] <p> |
1481
|
|
|
* Prior to PHP 5, this parameter is called |
1482
|
|
|
* use_include_path and is a bool. |
1483
|
|
|
* As of PHP 5 the FILE_USE_INCLUDE_PATH can be used |
1484
|
|
|
* to trigger include path |
1485
|
|
|
* search. |
1486
|
|
|
* </p> |
1487
|
|
|
* @param resource|null $context [optional] <p> |
1488
|
|
|
* A valid context resource created with |
1489
|
|
|
* stream_context_create. If you don't need to use a |
1490
|
|
|
* custom context, you can skip this parameter by &null;. |
1491
|
|
|
* </p> |
1492
|
|
|
* @param int|null $offset [optional] <p> |
1493
|
|
|
* The offset where the reading starts. |
1494
|
|
|
* </p> |
1495
|
|
|
* @param int|null $maxLength [optional] <p> |
1496
|
|
|
* Maximum length of data read. The default is to read until end |
1497
|
|
|
* of file is reached. |
1498
|
|
|
* </p> |
1499
|
|
|
* @param int $timeout <p>The time in seconds for the timeout.</p> |
1500
|
|
|
* @param bool $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for |
1501
|
|
|
* some files, because they used non default utf-8 chars. Binary files |
1502
|
|
|
* like images or pdf will not be converted.</p> |
1503
|
|
|
* @param string $fromEncoding [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br> |
1504
|
|
|
* A empty string will trigger the autodetect anyway.</p> |
1505
|
|
|
* |
1506
|
|
|
* @return false|string the function returns the read data as string or <b>false</b> on failure |
1507
|
|
|
*/ |
1508
|
12 |
|
public static function file_get_contents( |
1509
|
|
|
string $filename, |
1510
|
|
|
bool $use_include_path = false, |
1511
|
|
|
$context = null, |
1512
|
|
|
int $offset = null, |
1513
|
|
|
int $maxLength = null, |
1514
|
|
|
int $timeout = 10, |
1515
|
|
|
bool $convertToUtf8 = true, |
1516
|
|
|
string $fromEncoding = '' |
1517
|
|
|
) { |
1518
|
|
|
// init |
1519
|
12 |
|
$filename = \filter_var($filename, \FILTER_SANITIZE_STRING); |
1520
|
12 |
|
if ($filename === false) { |
1521
|
|
|
return false; |
1522
|
|
|
} |
1523
|
|
|
|
1524
|
12 |
|
if ($timeout && $context === null) { |
1525
|
9 |
|
$context = \stream_context_create( |
1526
|
|
|
[ |
1527
|
|
|
'http' => [ |
1528
|
9 |
|
'timeout' => $timeout, |
1529
|
|
|
], |
1530
|
|
|
] |
1531
|
|
|
); |
1532
|
|
|
} |
1533
|
|
|
|
1534
|
12 |
|
if ($offset === null) { |
1535
|
12 |
|
$offset = 0; |
1536
|
|
|
} |
1537
|
|
|
|
1538
|
12 |
|
if (\is_int($maxLength) === true) { |
1539
|
2 |
|
$data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength); |
1540
|
|
|
} else { |
1541
|
12 |
|
$data = \file_get_contents($filename, $use_include_path, $context, $offset); |
1542
|
|
|
} |
1543
|
|
|
|
1544
|
|
|
// return false on error |
1545
|
12 |
|
if ($data === false) { |
1546
|
|
|
return false; |
1547
|
|
|
} |
1548
|
|
|
|
1549
|
12 |
|
if ($convertToUtf8 === true) { |
1550
|
|
|
if ( |
1551
|
12 |
|
self::is_binary($data, true) !== true |
1552
|
|
|
|| |
1553
|
9 |
|
self::is_utf16($data, false) !== false |
1554
|
|
|
|| |
1555
|
12 |
|
self::is_utf32($data, false) !== false |
1556
|
|
|
) { |
1557
|
9 |
|
$data = self::encode('UTF-8', $data, false, $fromEncoding); |
1558
|
9 |
|
$data = self::cleanup($data); |
1559
|
|
|
} |
1560
|
|
|
} |
1561
|
|
|
|
1562
|
12 |
|
return $data; |
1563
|
|
|
} |
1564
|
|
|
|
1565
|
|
|
/** |
1566
|
|
|
* Checks if a file starts with BOM (Byte Order Mark) character. |
1567
|
|
|
* |
1568
|
|
|
* @param string $file_path <p>Path to a valid file.</p> |
1569
|
|
|
* |
1570
|
|
|
* @throws \RuntimeException if file_get_contents() returned false |
1571
|
|
|
* |
1572
|
|
|
* @return bool |
1573
|
|
|
* <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise |
1574
|
|
|
*/ |
1575
|
2 |
|
public static function file_has_bom(string $file_path): bool |
1576
|
|
|
{ |
1577
|
2 |
|
$file_content = \file_get_contents($file_path); |
1578
|
2 |
|
if ($file_content === false) { |
1579
|
|
|
throw new \RuntimeException('file_get_contents() returned false for:' . $file_path); |
1580
|
|
|
} |
1581
|
|
|
|
1582
|
2 |
|
return self::string_has_bom($file_content); |
1583
|
|
|
} |
1584
|
|
|
|
1585
|
|
|
/** |
1586
|
|
|
* Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
1587
|
|
|
* |
1588
|
|
|
* @param mixed $var |
1589
|
|
|
* @param int $normalization_form |
1590
|
|
|
* @param string $leading_combining |
1591
|
|
|
* |
1592
|
|
|
* @return mixed |
1593
|
|
|
*/ |
1594
|
62 |
|
public static function filter($var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌') |
1595
|
|
|
{ |
1596
|
62 |
|
switch (\gettype($var)) { |
1597
|
62 |
|
case 'array': |
1598
|
|
|
/** @noinspection ForeachSourceInspection */ |
1599
|
6 |
|
foreach ($var as $k => &$v) { |
1600
|
6 |
|
$v = self::filter($v, $normalization_form, $leading_combining); |
1601
|
|
|
} |
1602
|
6 |
|
unset($v); |
1603
|
|
|
|
1604
|
6 |
|
break; |
1605
|
62 |
|
case 'object': |
1606
|
|
|
/** @noinspection ForeachSourceInspection */ |
1607
|
4 |
|
foreach ($var as $k => &$v) { |
1608
|
4 |
|
$v = self::filter($v, $normalization_form, $leading_combining); |
1609
|
|
|
} |
1610
|
4 |
|
unset($v); |
1611
|
|
|
|
1612
|
4 |
|
break; |
1613
|
62 |
|
case 'string': |
1614
|
|
|
|
1615
|
62 |
|
if (\strpos($var, "\r") !== false) { |
1616
|
|
|
// Workaround https://bugs.php.net/65732 |
1617
|
3 |
|
$var = self::normalize_line_ending($var); |
1618
|
|
|
} |
1619
|
|
|
|
1620
|
62 |
|
if (self::is_ascii($var) === false) { |
1621
|
32 |
|
if (\Normalizer::isNormalized($var, $normalization_form)) { |
1622
|
27 |
|
$n = '-'; |
1623
|
|
|
} else { |
1624
|
12 |
|
$n = \Normalizer::normalize($var, $normalization_form); |
1625
|
|
|
|
1626
|
12 |
|
if (isset($n[0])) { |
1627
|
7 |
|
$var = $n; |
1628
|
|
|
} else { |
1629
|
8 |
|
$var = self::encode('UTF-8', $var, true); |
1630
|
|
|
} |
1631
|
|
|
} |
1632
|
|
|
|
1633
|
|
|
if ( |
1634
|
32 |
|
$var[0] >= "\x80" |
1635
|
|
|
&& |
1636
|
32 |
|
isset($n[0], $leading_combining[0]) |
1637
|
|
|
&& |
1638
|
32 |
|
\preg_match('/^\\p{Mn}/u', $var) |
1639
|
|
|
) { |
1640
|
|
|
// Prevent leading combining chars |
1641
|
|
|
// for NFC-safe concatenations. |
1642
|
3 |
|
$var = $leading_combining . $var; |
1643
|
|
|
} |
1644
|
|
|
} |
1645
|
|
|
|
1646
|
62 |
|
break; |
1647
|
|
|
} |
1648
|
|
|
|
1649
|
62 |
|
return $var; |
1650
|
|
|
} |
1651
|
|
|
|
1652
|
|
|
/** |
1653
|
|
|
* "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
1654
|
|
|
* |
1655
|
|
|
* Gets a specific external variable by name and optionally filters it |
1656
|
|
|
* |
1657
|
|
|
* @see http://php.net/manual/en/function.filter-input.php |
1658
|
|
|
* |
1659
|
|
|
* @param int $type <p> |
1660
|
|
|
* One of <b>INPUT_GET</b>, <b>INPUT_POST</b>, |
1661
|
|
|
* <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or |
1662
|
|
|
* <b>INPUT_ENV</b>. |
1663
|
|
|
* </p> |
1664
|
|
|
* @param string $variable_name <p> |
1665
|
|
|
* Name of a variable to get. |
1666
|
|
|
* </p> |
1667
|
|
|
* @param int $filter [optional] <p> |
1668
|
|
|
* The ID of the filter to apply. The |
1669
|
|
|
* manual page lists the available filters. |
1670
|
|
|
* </p> |
1671
|
|
|
* @param mixed $options [optional] <p> |
1672
|
|
|
* Associative array of options or bitwise disjunction of flags. If filter |
1673
|
|
|
* accepts options, flags can be provided in "flags" field of array. |
1674
|
|
|
* </p> |
1675
|
|
|
* |
1676
|
|
|
* @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the |
1677
|
|
|
* <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it |
1678
|
|
|
* returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails. |
1679
|
|
|
*/ |
1680
|
|
|
public static function filter_input( |
1681
|
|
|
int $type, |
1682
|
|
|
string $variable_name, |
1683
|
|
|
int $filter = \FILTER_DEFAULT, |
1684
|
|
|
$options = null |
1685
|
|
|
) { |
1686
|
|
|
if (\func_num_args() < 4) { |
1687
|
|
|
$var = \filter_input($type, $variable_name, $filter); |
1688
|
|
|
} else { |
1689
|
|
|
$var = \filter_input($type, $variable_name, $filter, $options); |
1690
|
|
|
} |
1691
|
|
|
|
1692
|
|
|
return self::filter($var); |
1693
|
|
|
} |
1694
|
|
|
|
1695
|
|
|
/** |
1696
|
|
|
* "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
1697
|
|
|
* |
1698
|
|
|
* Gets external variables and optionally filters them |
1699
|
|
|
* |
1700
|
|
|
* @see http://php.net/manual/en/function.filter-input-array.php |
1701
|
|
|
* |
1702
|
|
|
* @param int $type <p> |
1703
|
|
|
* One of <b>INPUT_GET</b>, <b>INPUT_POST</b>, |
1704
|
|
|
* <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or |
1705
|
|
|
* <b>INPUT_ENV</b>. |
1706
|
|
|
* </p> |
1707
|
|
|
* @param mixed $definition [optional] <p> |
1708
|
|
|
* An array defining the arguments. A valid key is a string |
1709
|
|
|
* containing a variable name and a valid value is either a filter type, or an array |
1710
|
|
|
* optionally specifying the filter, flags and options. If the value is an |
1711
|
|
|
* array, valid keys are filter which specifies the |
1712
|
|
|
* filter type, |
1713
|
|
|
* flags which specifies any flags that apply to the |
1714
|
|
|
* filter, and options which specifies any options that |
1715
|
|
|
* apply to the filter. See the example below for a better understanding. |
1716
|
|
|
* </p> |
1717
|
|
|
* <p> |
1718
|
|
|
* This parameter can be also an integer holding a filter constant. Then all values in the |
1719
|
|
|
* input array are filtered by this filter. |
1720
|
|
|
* </p> |
1721
|
|
|
* @param bool $add_empty [optional] <p> |
1722
|
|
|
* Add missing keys as <b>NULL</b> to the return value. |
1723
|
|
|
* </p> |
1724
|
|
|
* |
1725
|
|
|
* @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. |
1726
|
|
|
* An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not |
1727
|
|
|
* set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable |
1728
|
|
|
* is not set and <b>NULL</b> if the filter fails. |
1729
|
|
|
*/ |
1730
|
|
|
public static function filter_input_array(int $type, $definition = null, bool $add_empty = true) |
1731
|
|
|
{ |
1732
|
|
|
if (\func_num_args() < 2) { |
1733
|
|
|
$a = \filter_input_array($type); |
1734
|
|
|
} else { |
1735
|
|
|
$a = \filter_input_array($type, $definition, $add_empty); |
1736
|
|
|
} |
1737
|
|
|
|
1738
|
|
|
return self::filter($a); |
1739
|
|
|
} |
1740
|
|
|
|
1741
|
|
|
/** |
1742
|
|
|
* "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
1743
|
|
|
* |
1744
|
|
|
* Filters a variable with a specified filter |
1745
|
|
|
* |
1746
|
|
|
* @see http://php.net/manual/en/function.filter-var.php |
1747
|
|
|
* |
1748
|
|
|
* @param mixed $variable <p> |
1749
|
|
|
* Value to filter. |
1750
|
|
|
* </p> |
1751
|
|
|
* @param int $filter [optional] <p> |
1752
|
|
|
* The ID of the filter to apply. The |
1753
|
|
|
* manual page lists the available filters. |
1754
|
|
|
* </p> |
1755
|
|
|
* @param mixed $options [optional] <p> |
1756
|
|
|
* Associative array of options or bitwise disjunction of flags. If filter |
1757
|
|
|
* accepts options, flags can be provided in "flags" field of array. For |
1758
|
|
|
* the "callback" filter, callable type should be passed. The |
1759
|
|
|
* callback must accept one argument, the value to be filtered, and return |
1760
|
|
|
* the value after filtering/sanitizing it. |
1761
|
|
|
* </p> |
1762
|
|
|
* <p> |
1763
|
|
|
* <code> |
1764
|
|
|
* // for filters that accept options, use this format |
1765
|
|
|
* $options = array( |
1766
|
|
|
* 'options' => array( |
1767
|
|
|
* 'default' => 3, // value to return if the filter fails |
1768
|
|
|
* // other options here |
1769
|
|
|
* 'min_range' => 0 |
1770
|
|
|
* ), |
1771
|
|
|
* 'flags' => FILTER_FLAG_ALLOW_OCTAL, |
1772
|
|
|
* ); |
1773
|
|
|
* $var = filter_var('0755', FILTER_VALIDATE_INT, $options); |
1774
|
|
|
* // for filter that only accept flags, you can pass them directly |
1775
|
|
|
* $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE); |
1776
|
|
|
* // for filter that only accept flags, you can also pass as an array |
1777
|
|
|
* $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, |
1778
|
|
|
* array('flags' => FILTER_NULL_ON_FAILURE)); |
1779
|
|
|
* // callback validate filter |
1780
|
|
|
* function foo($value) |
1781
|
|
|
* { |
1782
|
|
|
* // Expected format: Surname, GivenNames |
1783
|
|
|
* if (strpos($value, ", ") === false) return false; |
1784
|
|
|
* list($surname, $givennames) = explode(", ", $value, 2); |
1785
|
|
|
* $empty = (empty($surname) || empty($givennames)); |
1786
|
|
|
* $notstrings = (!is_string($surname) || !is_string($givennames)); |
1787
|
|
|
* if ($empty || $notstrings) { |
1788
|
|
|
* return false; |
1789
|
|
|
* } else { |
1790
|
|
|
* return $value; |
1791
|
|
|
* } |
1792
|
|
|
* } |
1793
|
|
|
* $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo')); |
1794
|
|
|
* </code> |
1795
|
|
|
* </p> |
1796
|
|
|
* |
1797
|
|
|
* @return mixed the filtered data, or <b>FALSE</b> if the filter fails |
1798
|
|
|
*/ |
1799
|
2 |
|
public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null) |
1800
|
|
|
{ |
1801
|
2 |
|
if (\func_num_args() < 3) { |
1802
|
2 |
|
$variable = \filter_var($variable, $filter); |
1803
|
|
|
} else { |
1804
|
2 |
|
$variable = \filter_var($variable, $filter, $options); |
1805
|
|
|
} |
1806
|
|
|
|
1807
|
2 |
|
return self::filter($variable); |
1808
|
|
|
} |
1809
|
|
|
|
1810
|
|
|
/** |
1811
|
|
|
* "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
1812
|
|
|
* |
1813
|
|
|
* Gets multiple variables and optionally filters them |
1814
|
|
|
* |
1815
|
|
|
* @see http://php.net/manual/en/function.filter-var-array.php |
1816
|
|
|
* |
1817
|
|
|
* @param array $data <p> |
1818
|
|
|
* An array with string keys containing the data to filter. |
1819
|
|
|
* </p> |
1820
|
|
|
* @param mixed $definition [optional] <p> |
1821
|
|
|
* An array defining the arguments. A valid key is a string |
1822
|
|
|
* containing a variable name and a valid value is either a |
1823
|
|
|
* filter type, or an |
1824
|
|
|
* array optionally specifying the filter, flags and options. |
1825
|
|
|
* If the value is an array, valid keys are filter |
1826
|
|
|
* which specifies the filter type, |
1827
|
|
|
* flags which specifies any flags that apply to the |
1828
|
|
|
* filter, and options which specifies any options that |
1829
|
|
|
* apply to the filter. See the example below for a better understanding. |
1830
|
|
|
* </p> |
1831
|
|
|
* <p> |
1832
|
|
|
* This parameter can be also an integer holding a filter constant. Then all values in the |
1833
|
|
|
* input array are filtered by this filter. |
1834
|
|
|
* </p> |
1835
|
|
|
* @param bool $add_empty [optional] <p> |
1836
|
|
|
* Add missing keys as <b>NULL</b> to the return value. |
1837
|
|
|
* </p> |
1838
|
|
|
* |
1839
|
|
|
* @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure. |
1840
|
|
|
* An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not |
1841
|
|
|
* set |
1842
|
|
|
*/ |
1843
|
2 |
|
public static function filter_var_array(array $data, $definition = null, bool $add_empty = true) |
1844
|
|
|
{ |
1845
|
2 |
|
if (\func_num_args() < 2) { |
1846
|
2 |
|
$a = \filter_var_array($data); |
1847
|
|
|
} else { |
1848
|
2 |
|
$a = \filter_var_array($data, $definition, $add_empty); |
1849
|
|
|
} |
1850
|
|
|
|
1851
|
2 |
|
return self::filter($a); |
1852
|
|
|
} |
1853
|
|
|
|
1854
|
|
|
/** |
1855
|
|
|
* Checks whether finfo is available on the server. |
1856
|
|
|
* |
1857
|
|
|
* @return bool |
1858
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
1859
|
|
|
*/ |
1860
|
|
|
public static function finfo_loaded(): bool |
1861
|
|
|
{ |
1862
|
|
|
return \class_exists('finfo'); |
1863
|
|
|
} |
1864
|
|
|
|
1865
|
|
|
/** |
1866
|
|
|
* Returns the first $n characters of the string. |
1867
|
|
|
* |
1868
|
|
|
* @param string $str <p>The input string.</p> |
1869
|
|
|
* @param int $n <p>Number of characters to retrieve from the start.</p> |
1870
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
1871
|
|
|
* |
1872
|
|
|
* @return string |
1873
|
|
|
*/ |
1874
|
13 |
|
public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string |
1875
|
|
|
{ |
1876
|
13 |
|
if ($str === '' || $n <= 0) { |
1877
|
5 |
|
return ''; |
1878
|
|
|
} |
1879
|
|
|
|
1880
|
8 |
|
if ($encoding === 'UTF-8') { |
1881
|
4 |
|
return (string) \mb_substr($str, 0, $n); |
1882
|
|
|
} |
1883
|
|
|
|
1884
|
4 |
|
return (string) self::substr($str, 0, $n, $encoding); |
1885
|
|
|
} |
1886
|
|
|
|
1887
|
|
|
/** |
1888
|
|
|
* Check if the number of unicode characters are not more than the specified integer. |
1889
|
|
|
* |
1890
|
|
|
* @param string $str the original string to be checked |
1891
|
|
|
* @param int $box_size the size in number of chars to be checked against string |
1892
|
|
|
* |
1893
|
|
|
* @return bool true if string is less than or equal to $box_size, false otherwise |
1894
|
|
|
*/ |
1895
|
2 |
|
public static function fits_inside(string $str, int $box_size): bool |
1896
|
|
|
{ |
1897
|
2 |
|
return self::strlen($str) <= $box_size; |
1898
|
|
|
} |
1899
|
|
|
|
1900
|
|
|
/** |
1901
|
|
|
* Try to fix simple broken UTF-8 strings. |
1902
|
|
|
* |
1903
|
|
|
* INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings. |
1904
|
|
|
* |
1905
|
|
|
* If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1 |
1906
|
|
|
* (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. |
1907
|
|
|
* See: http://en.wikipedia.org/wiki/Windows-1252 |
1908
|
|
|
* |
1909
|
|
|
* @param string $str <p>The input string</p> |
1910
|
|
|
* |
1911
|
|
|
* @return string |
1912
|
|
|
*/ |
1913
|
46 |
|
public static function fix_simple_utf8(string $str): string |
1914
|
|
|
{ |
1915
|
46 |
|
if ($str === '') { |
1916
|
4 |
|
return ''; |
1917
|
|
|
} |
1918
|
|
|
|
1919
|
46 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
1920
|
46 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
1921
|
|
|
|
1922
|
46 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
1923
|
1 |
|
if (self::$BROKEN_UTF8_FIX === null) { |
1924
|
1 |
|
self::$BROKEN_UTF8_FIX = self::getData('utf8_fix'); |
1925
|
|
|
} |
1926
|
|
|
|
1927
|
1 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX); |
1928
|
1 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX); |
1929
|
|
|
} |
1930
|
|
|
|
1931
|
46 |
|
return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
1932
|
|
|
} |
1933
|
|
|
|
1934
|
|
|
/** |
1935
|
|
|
* Fix a double (or multiple) encoded UTF8 string. |
1936
|
|
|
* |
1937
|
|
|
* @param string|string[] $str you can use a string or an array of strings |
1938
|
|
|
* |
1939
|
|
|
* @return string|string[] |
1940
|
|
|
* Will return the fixed input-"array" or |
1941
|
|
|
* the fixed input-"string" |
1942
|
|
|
* |
1943
|
|
|
* @psalm-suppress InvalidReturnType |
1944
|
|
|
*/ |
1945
|
2 |
|
public static function fix_utf8($str) |
1946
|
|
|
{ |
1947
|
2 |
|
if (\is_array($str) === true) { |
1948
|
2 |
|
foreach ($str as $k => &$v) { |
1949
|
2 |
|
$v = self::fix_utf8($v); |
1950
|
|
|
} |
1951
|
2 |
|
unset($v); |
1952
|
|
|
|
1953
|
|
|
/** |
1954
|
|
|
* @psalm-suppress InvalidReturnStatement |
1955
|
|
|
*/ |
1956
|
2 |
|
return $str; |
1957
|
|
|
} |
1958
|
|
|
|
1959
|
2 |
|
$str = (string) $str; |
1960
|
2 |
|
$last = ''; |
1961
|
2 |
|
while ($last !== $str) { |
1962
|
2 |
|
$last = $str; |
1963
|
|
|
/** |
1964
|
|
|
* @psalm-suppress PossiblyInvalidArgument |
1965
|
|
|
*/ |
1966
|
2 |
|
$str = self::to_utf8( |
1967
|
2 |
|
self::utf8_decode($str, true) |
1968
|
|
|
); |
1969
|
|
|
} |
1970
|
|
|
|
1971
|
|
|
/** |
1972
|
|
|
* @psalm-suppress InvalidReturnStatement |
1973
|
|
|
*/ |
1974
|
2 |
|
return $str; |
1975
|
|
|
} |
1976
|
|
|
|
1977
|
|
|
/** |
1978
|
|
|
* Get character of a specific character. |
1979
|
|
|
* |
1980
|
|
|
* @param string $char |
1981
|
|
|
* |
1982
|
|
|
* @return string 'RTL' or 'LTR' |
1983
|
|
|
*/ |
1984
|
2 |
|
public static function getCharDirection(string $char): string |
1985
|
|
|
{ |
1986
|
2 |
|
if (self::$SUPPORT['intlChar'] === true) { |
1987
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
1988
|
2 |
|
$tmpReturn = \IntlChar::charDirection($char); |
1989
|
|
|
|
1990
|
|
|
// from "IntlChar"-Class |
1991
|
|
|
$charDirection = [ |
1992
|
2 |
|
'RTL' => [1, 13, 14, 15, 21], |
1993
|
|
|
'LTR' => [0, 11, 12, 20], |
1994
|
|
|
]; |
1995
|
|
|
|
1996
|
2 |
|
if (\in_array($tmpReturn, $charDirection['LTR'], true)) { |
1997
|
|
|
return 'LTR'; |
1998
|
|
|
} |
1999
|
|
|
|
2000
|
2 |
|
if (\in_array($tmpReturn, $charDirection['RTL'], true)) { |
2001
|
2 |
|
return 'RTL'; |
2002
|
|
|
} |
2003
|
|
|
} |
2004
|
|
|
|
2005
|
2 |
|
$c = static::chr_to_decimal($char); |
2006
|
|
|
|
2007
|
2 |
|
if (!($c >= 0x5be && $c <= 0x10b7f)) { |
2008
|
2 |
|
return 'LTR'; |
2009
|
|
|
} |
2010
|
|
|
|
2011
|
2 |
|
if ($c <= 0x85e) { |
2012
|
2 |
|
if ($c === 0x5be || |
2013
|
2 |
|
$c === 0x5c0 || |
2014
|
2 |
|
$c === 0x5c3 || |
2015
|
2 |
|
$c === 0x5c6 || |
2016
|
2 |
|
($c >= 0x5d0 && $c <= 0x5ea) || |
2017
|
2 |
|
($c >= 0x5f0 && $c <= 0x5f4) || |
2018
|
2 |
|
$c === 0x608 || |
2019
|
2 |
|
$c === 0x60b || |
2020
|
2 |
|
$c === 0x60d || |
2021
|
2 |
|
$c === 0x61b || |
2022
|
2 |
|
($c >= 0x61e && $c <= 0x64a) || |
2023
|
|
|
($c >= 0x66d && $c <= 0x66f) || |
2024
|
|
|
($c >= 0x671 && $c <= 0x6d5) || |
2025
|
|
|
($c >= 0x6e5 && $c <= 0x6e6) || |
2026
|
|
|
($c >= 0x6ee && $c <= 0x6ef) || |
2027
|
|
|
($c >= 0x6fa && $c <= 0x70d) || |
2028
|
|
|
$c === 0x710 || |
2029
|
|
|
($c >= 0x712 && $c <= 0x72f) || |
2030
|
|
|
($c >= 0x74d && $c <= 0x7a5) || |
2031
|
|
|
$c === 0x7b1 || |
2032
|
|
|
($c >= 0x7c0 && $c <= 0x7ea) || |
2033
|
|
|
($c >= 0x7f4 && $c <= 0x7f5) || |
2034
|
|
|
$c === 0x7fa || |
2035
|
|
|
($c >= 0x800 && $c <= 0x815) || |
2036
|
|
|
$c === 0x81a || |
2037
|
|
|
$c === 0x824 || |
2038
|
|
|
$c === 0x828 || |
2039
|
|
|
($c >= 0x830 && $c <= 0x83e) || |
2040
|
|
|
($c >= 0x840 && $c <= 0x858) || |
2041
|
2 |
|
$c === 0x85e |
2042
|
|
|
) { |
2043
|
2 |
|
return 'RTL'; |
2044
|
|
|
} |
2045
|
2 |
|
} elseif ($c === 0x200f) { |
2046
|
|
|
return 'RTL'; |
2047
|
2 |
|
} elseif ($c >= 0xfb1d) { |
2048
|
2 |
|
if ($c === 0xfb1d || |
2049
|
2 |
|
($c >= 0xfb1f && $c <= 0xfb28) || |
2050
|
2 |
|
($c >= 0xfb2a && $c <= 0xfb36) || |
2051
|
2 |
|
($c >= 0xfb38 && $c <= 0xfb3c) || |
2052
|
2 |
|
$c === 0xfb3e || |
2053
|
2 |
|
($c >= 0xfb40 && $c <= 0xfb41) || |
2054
|
2 |
|
($c >= 0xfb43 && $c <= 0xfb44) || |
2055
|
2 |
|
($c >= 0xfb46 && $c <= 0xfbc1) || |
2056
|
2 |
|
($c >= 0xfbd3 && $c <= 0xfd3d) || |
2057
|
2 |
|
($c >= 0xfd50 && $c <= 0xfd8f) || |
2058
|
2 |
|
($c >= 0xfd92 && $c <= 0xfdc7) || |
2059
|
2 |
|
($c >= 0xfdf0 && $c <= 0xfdfc) || |
2060
|
2 |
|
($c >= 0xfe70 && $c <= 0xfe74) || |
2061
|
2 |
|
($c >= 0xfe76 && $c <= 0xfefc) || |
2062
|
2 |
|
($c >= 0x10800 && $c <= 0x10805) || |
2063
|
2 |
|
$c === 0x10808 || |
2064
|
2 |
|
($c >= 0x1080a && $c <= 0x10835) || |
2065
|
2 |
|
($c >= 0x10837 && $c <= 0x10838) || |
2066
|
2 |
|
$c === 0x1083c || |
2067
|
2 |
|
($c >= 0x1083f && $c <= 0x10855) || |
2068
|
2 |
|
($c >= 0x10857 && $c <= 0x1085f) || |
2069
|
2 |
|
($c >= 0x10900 && $c <= 0x1091b) || |
2070
|
2 |
|
($c >= 0x10920 && $c <= 0x10939) || |
2071
|
2 |
|
$c === 0x1093f || |
2072
|
2 |
|
$c === 0x10a00 || |
2073
|
2 |
|
($c >= 0x10a10 && $c <= 0x10a13) || |
2074
|
2 |
|
($c >= 0x10a15 && $c <= 0x10a17) || |
2075
|
2 |
|
($c >= 0x10a19 && $c <= 0x10a33) || |
2076
|
2 |
|
($c >= 0x10a40 && $c <= 0x10a47) || |
2077
|
2 |
|
($c >= 0x10a50 && $c <= 0x10a58) || |
2078
|
2 |
|
($c >= 0x10a60 && $c <= 0x10a7f) || |
2079
|
2 |
|
($c >= 0x10b00 && $c <= 0x10b35) || |
2080
|
2 |
|
($c >= 0x10b40 && $c <= 0x10b55) || |
2081
|
2 |
|
($c >= 0x10b58 && $c <= 0x10b72) || |
2082
|
2 |
|
($c >= 0x10b78 && $c <= 0x10b7f) |
2083
|
|
|
) { |
2084
|
2 |
|
return 'RTL'; |
2085
|
|
|
} |
2086
|
|
|
} |
2087
|
|
|
|
2088
|
2 |
|
return 'LTR'; |
2089
|
|
|
} |
2090
|
|
|
|
2091
|
|
|
/** |
2092
|
|
|
* Check for php-support. |
2093
|
|
|
* |
2094
|
|
|
* @param string|null $key |
2095
|
|
|
* |
2096
|
|
|
* @return mixed |
2097
|
|
|
* Return the full support-"array", if $key === null<br> |
2098
|
|
|
* return bool-value, if $key is used and available<br> |
2099
|
|
|
* otherwise return <strong>null</strong> |
2100
|
|
|
*/ |
2101
|
27 |
|
public static function getSupportInfo(string $key = null) |
2102
|
|
|
{ |
2103
|
27 |
|
if ($key === null) { |
2104
|
4 |
|
return self::$SUPPORT; |
2105
|
|
|
} |
2106
|
|
|
|
2107
|
25 |
|
if (self::$INTL_TRANSLITERATOR_LIST === null) { |
2108
|
1 |
|
self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); |
2109
|
|
|
} |
2110
|
|
|
// compatibility fix for old versions |
2111
|
25 |
|
self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST; |
2112
|
|
|
|
2113
|
25 |
|
return self::$SUPPORT[$key] ?? null; |
2114
|
|
|
} |
2115
|
|
|
|
2116
|
|
|
/** |
2117
|
|
|
* Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf) |
2118
|
|
|
* if you need more supported types, please use e.g. "finfo" |
2119
|
|
|
* |
2120
|
|
|
* @param string $str |
2121
|
|
|
* @param array $fallback with this keys: 'ext', 'mime', 'type' |
2122
|
|
|
* |
2123
|
|
|
* @return array |
2124
|
|
|
* with this keys: 'ext', 'mime', 'type' |
2125
|
|
|
*/ |
2126
|
39 |
|
public static function get_file_type( |
2127
|
|
|
string $str, |
2128
|
|
|
array $fallback = [ |
2129
|
|
|
'ext' => null, |
2130
|
|
|
'mime' => 'application/octet-stream', |
2131
|
|
|
'type' => null, |
2132
|
|
|
] |
2133
|
|
|
): array { |
2134
|
39 |
|
if ($str === '') { |
2135
|
|
|
return $fallback; |
2136
|
|
|
} |
2137
|
|
|
|
2138
|
39 |
|
$str_info = \substr($str, 0, 2); |
2139
|
39 |
|
if ($str_info === false || \strlen($str_info) !== 2) { |
2140
|
11 |
|
return $fallback; |
2141
|
|
|
} |
2142
|
|
|
|
2143
|
35 |
|
$str_info = \unpack('C2chars', $str_info); |
2144
|
35 |
|
if ($str_info === false) { |
2145
|
|
|
return $fallback; |
2146
|
|
|
} |
2147
|
|
|
/** @noinspection OffsetOperationsInspection */ |
2148
|
35 |
|
$type_code = (int) ($str_info['chars1'] . $str_info['chars2']); |
2149
|
|
|
|
2150
|
|
|
// DEBUG |
2151
|
|
|
//var_dump($type_code); |
2152
|
|
|
|
2153
|
|
|
switch ($type_code) { |
2154
|
35 |
|
case 3780: |
2155
|
5 |
|
$ext = 'pdf'; |
2156
|
5 |
|
$mime = 'application/pdf'; |
2157
|
5 |
|
$type = 'binary'; |
2158
|
|
|
|
2159
|
5 |
|
break; |
2160
|
35 |
|
case 7790: |
2161
|
|
|
$ext = 'exe'; |
2162
|
|
|
$mime = 'application/octet-stream'; |
2163
|
|
|
$type = 'binary'; |
2164
|
|
|
|
2165
|
|
|
break; |
2166
|
35 |
|
case 7784: |
2167
|
|
|
$ext = 'midi'; |
2168
|
|
|
$mime = 'audio/x-midi'; |
2169
|
|
|
$type = 'binary'; |
2170
|
|
|
|
2171
|
|
|
break; |
2172
|
35 |
|
case 8075: |
2173
|
7 |
|
$ext = 'zip'; |
2174
|
7 |
|
$mime = 'application/zip'; |
2175
|
7 |
|
$type = 'binary'; |
2176
|
|
|
|
2177
|
7 |
|
break; |
2178
|
35 |
|
case 8297: |
2179
|
|
|
$ext = 'rar'; |
2180
|
|
|
$mime = 'application/rar'; |
2181
|
|
|
$type = 'binary'; |
2182
|
|
|
|
2183
|
|
|
break; |
2184
|
35 |
|
case 255216: |
2185
|
|
|
$ext = 'jpg'; |
2186
|
|
|
$mime = 'image/jpeg'; |
2187
|
|
|
$type = 'binary'; |
2188
|
|
|
|
2189
|
|
|
break; |
2190
|
35 |
|
case 7173: |
2191
|
|
|
$ext = 'gif'; |
2192
|
|
|
$mime = 'image/gif'; |
2193
|
|
|
$type = 'binary'; |
2194
|
|
|
|
2195
|
|
|
break; |
2196
|
35 |
|
case 7373: |
2197
|
|
|
$ext = 'tiff'; |
2198
|
|
|
$mime = 'image/tiff'; |
2199
|
|
|
$type = 'binary'; |
2200
|
|
|
|
2201
|
|
|
break; |
2202
|
35 |
|
case 6677: |
2203
|
|
|
$ext = 'bmp'; |
2204
|
|
|
$mime = 'image/bmp'; |
2205
|
|
|
$type = 'binary'; |
2206
|
|
|
|
2207
|
|
|
break; |
2208
|
35 |
|
case 13780: |
2209
|
7 |
|
$ext = 'png'; |
2210
|
7 |
|
$mime = 'image/png'; |
2211
|
7 |
|
$type = 'binary'; |
2212
|
|
|
|
2213
|
7 |
|
break; |
2214
|
|
|
default: |
2215
|
32 |
|
return $fallback; |
2216
|
|
|
} |
2217
|
|
|
|
2218
|
|
|
return [ |
2219
|
7 |
|
'ext' => $ext, |
2220
|
7 |
|
'mime' => $mime, |
2221
|
7 |
|
'type' => $type, |
2222
|
|
|
]; |
2223
|
|
|
} |
2224
|
|
|
|
2225
|
|
|
/** |
2226
|
|
|
* @param int $length <p>Length of the random string.</p> |
2227
|
|
|
* @param string $possibleChars [optional] <p>Characters string for the random selection.</p> |
2228
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
2229
|
|
|
* |
2230
|
|
|
* @return string |
2231
|
|
|
*/ |
2232
|
1 |
|
public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string |
2233
|
|
|
{ |
2234
|
|
|
// init |
2235
|
1 |
|
$i = 0; |
2236
|
1 |
|
$str = ''; |
2237
|
|
|
|
2238
|
|
|
// |
2239
|
|
|
// add random chars |
2240
|
|
|
// |
2241
|
|
|
|
2242
|
1 |
|
if ($encoding === 'UTF-8') { |
2243
|
1 |
|
$maxlength = (int) \mb_strlen($possibleChars); |
2244
|
1 |
|
if ($maxlength === 0) { |
2245
|
1 |
|
return ''; |
2246
|
|
|
} |
2247
|
|
|
|
2248
|
1 |
|
while ($i < $length) { |
2249
|
|
|
try { |
2250
|
1 |
|
$randInt = \random_int(0, $maxlength - 1); |
2251
|
|
|
} catch (\Exception $e) { |
2252
|
|
|
/** @noinspection RandomApiMigrationInspection */ |
2253
|
|
|
$randInt = \mt_rand(0, $maxlength - 1); |
2254
|
|
|
} |
2255
|
1 |
|
$char = \mb_substr($possibleChars, $randInt, 1); |
2256
|
1 |
|
if ($char !== false) { |
2257
|
1 |
|
$str .= $char; |
2258
|
1 |
|
++$i; |
2259
|
|
|
} |
2260
|
|
|
} |
2261
|
|
|
} else { |
2262
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
2263
|
|
|
|
2264
|
|
|
$maxlength = (int) self::strlen($possibleChars, $encoding); |
2265
|
|
|
if ($maxlength === 0) { |
2266
|
|
|
return ''; |
2267
|
|
|
} |
2268
|
|
|
|
2269
|
|
|
while ($i < $length) { |
2270
|
|
|
try { |
2271
|
|
|
$randInt = \random_int(0, $maxlength - 1); |
2272
|
|
|
} catch (\Exception $e) { |
2273
|
|
|
/** @noinspection RandomApiMigrationInspection */ |
2274
|
|
|
$randInt = \mt_rand(0, $maxlength - 1); |
2275
|
|
|
} |
2276
|
|
|
$char = self::substr($possibleChars, $randInt, 1, $encoding); |
2277
|
|
|
if ($char !== false) { |
2278
|
|
|
$str .= $char; |
2279
|
|
|
++$i; |
2280
|
|
|
} |
2281
|
|
|
} |
2282
|
|
|
} |
2283
|
|
|
|
2284
|
1 |
|
return $str; |
2285
|
|
|
} |
2286
|
|
|
|
2287
|
|
|
/** |
2288
|
|
|
* @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p> |
2289
|
|
|
* @param bool $md5 [optional] <p>Return the unique identifier as md5-hash? Default: true</p> |
2290
|
|
|
* |
2291
|
|
|
* @return string |
2292
|
|
|
*/ |
2293
|
1 |
|
public static function get_unique_string($entropyExtra = '', bool $md5 = true): string |
2294
|
|
|
{ |
2295
|
1 |
|
$uniqueHelper = \random_int(0, \mt_getrandmax()) . |
2296
|
1 |
|
\session_id() . |
2297
|
1 |
|
($_SERVER['REMOTE_ADDR'] ?? '') . |
2298
|
1 |
|
($_SERVER['SERVER_ADDR'] ?? '') . |
2299
|
1 |
|
$entropyExtra; |
2300
|
|
|
|
2301
|
1 |
|
$uniqueString = \uniqid($uniqueHelper, true); |
2302
|
|
|
|
2303
|
1 |
|
if ($md5) { |
2304
|
1 |
|
$uniqueString = \md5($uniqueString . $uniqueHelper); |
2305
|
|
|
} |
2306
|
|
|
|
2307
|
1 |
|
return $uniqueString; |
2308
|
|
|
} |
2309
|
|
|
|
2310
|
|
|
/** |
2311
|
|
|
* alias for "UTF8::string_has_bom()" |
2312
|
|
|
* |
2313
|
|
|
* @param string $str |
2314
|
|
|
* |
2315
|
|
|
* @return bool |
2316
|
|
|
* |
2317
|
|
|
* @see UTF8::string_has_bom() |
2318
|
|
|
* @deprecated <p>use "UTF8::string_has_bom()"</p> |
2319
|
|
|
*/ |
2320
|
2 |
|
public static function hasBom(string $str): bool |
2321
|
|
|
{ |
2322
|
2 |
|
return self::string_has_bom($str); |
2323
|
|
|
} |
2324
|
|
|
|
2325
|
|
|
/** |
2326
|
|
|
* Returns true if the string contains a lower case char, false otherwise. |
2327
|
|
|
* |
2328
|
|
|
* @param string $str <p>The input string.</p> |
2329
|
|
|
* |
2330
|
|
|
* @return bool whether or not the string contains a lower case character |
2331
|
|
|
*/ |
2332
|
47 |
|
public static function has_lowercase(string $str): bool |
2333
|
|
|
{ |
2334
|
47 |
|
if (self::$SUPPORT['mbstring'] === true) { |
2335
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
2336
|
47 |
|
return \mb_ereg_match('.*[[:lower:]]', $str); |
2337
|
|
|
} |
2338
|
|
|
|
2339
|
|
|
return self::str_matches_pattern($str, '.*[[:lower:]]'); |
2340
|
|
|
} |
2341
|
|
|
|
2342
|
|
|
/** |
2343
|
|
|
* Returns true if the string contains an upper case char, false otherwise. |
2344
|
|
|
* |
2345
|
|
|
* @param string $str <p>The input string.</p> |
2346
|
|
|
* |
2347
|
|
|
* @return bool whether or not the string contains an upper case character |
2348
|
|
|
*/ |
2349
|
12 |
|
public static function has_uppercase(string $str): bool |
2350
|
|
|
{ |
2351
|
12 |
|
if (self::$SUPPORT['mbstring'] === true) { |
2352
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
2353
|
12 |
|
return \mb_ereg_match('.*[[:upper:]]', $str); |
2354
|
|
|
} |
2355
|
|
|
|
2356
|
|
|
return self::str_matches_pattern($str, '.*[[:upper:]]'); |
2357
|
|
|
} |
2358
|
|
|
|
2359
|
|
|
/** |
2360
|
|
|
* Converts a hexadecimal-value into an UTF-8 character. |
2361
|
|
|
* |
2362
|
|
|
* @param string $hexdec <p>The hexadecimal value.</p> |
2363
|
|
|
* |
2364
|
|
|
* @return false|string one single UTF-8 character |
2365
|
|
|
*/ |
2366
|
4 |
|
public static function hex_to_chr(string $hexdec) |
2367
|
|
|
{ |
2368
|
4 |
|
return self::decimal_to_chr(\hexdec($hexdec)); |
2369
|
|
|
} |
2370
|
|
|
|
2371
|
|
|
/** |
2372
|
|
|
* Converts hexadecimal U+xxxx code point representation to integer. |
2373
|
|
|
* |
2374
|
|
|
* INFO: opposite to UTF8::int_to_hex() |
2375
|
|
|
* |
2376
|
|
|
* @param string $hexDec <p>The hexadecimal code point representation.</p> |
2377
|
|
|
* |
2378
|
|
|
* @return false|int the code point, or false on failure |
2379
|
|
|
*/ |
2380
|
2 |
|
public static function hex_to_int($hexDec) |
2381
|
|
|
{ |
2382
|
|
|
// init |
2383
|
2 |
|
$hexDec = (string) $hexDec; |
2384
|
|
|
|
2385
|
2 |
|
if ($hexDec === '') { |
2386
|
2 |
|
return false; |
2387
|
|
|
} |
2388
|
|
|
|
2389
|
2 |
|
if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexDec, $match)) { |
2390
|
2 |
|
return \intval($match[1], 16); |
2391
|
|
|
} |
2392
|
|
|
|
2393
|
2 |
|
return false; |
2394
|
|
|
} |
2395
|
|
|
|
2396
|
|
|
/** |
2397
|
|
|
* alias for "UTF8::html_entity_decode()" |
2398
|
|
|
* |
2399
|
|
|
* @param string $str |
2400
|
|
|
* @param int $flags |
2401
|
|
|
* @param string $encoding |
2402
|
|
|
* |
2403
|
|
|
* @return string |
2404
|
|
|
* |
2405
|
|
|
* @see UTF8::html_entity_decode() |
2406
|
|
|
*/ |
2407
|
4 |
|
public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string |
2408
|
|
|
{ |
2409
|
4 |
|
return self::html_entity_decode($str, $flags, $encoding); |
2410
|
|
|
} |
2411
|
|
|
|
2412
|
|
|
/** |
2413
|
|
|
* Converts a UTF-8 string to a series of HTML numbered entities. |
2414
|
|
|
* |
2415
|
|
|
* INFO: opposite to UTF8::html_decode() |
2416
|
|
|
* |
2417
|
|
|
* @param string $str <p>The Unicode string to be encoded as numbered entities.</p> |
2418
|
|
|
* @param bool $keepAsciiChars [optional] <p>Keep ASCII chars.</p> |
2419
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
2420
|
|
|
* |
2421
|
|
|
* @return string HTML numbered entities |
2422
|
|
|
*/ |
2423
|
14 |
|
public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string |
2424
|
|
|
{ |
2425
|
14 |
|
if ($str === '') { |
2426
|
4 |
|
return ''; |
2427
|
|
|
} |
2428
|
|
|
|
2429
|
14 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
2430
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
2431
|
|
|
} |
2432
|
|
|
|
2433
|
|
|
// INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity |
2434
|
14 |
|
if (self::$SUPPORT['mbstring'] === true) { |
2435
|
14 |
|
$startCode = 0x00; |
2436
|
14 |
|
if ($keepAsciiChars === true) { |
2437
|
13 |
|
$startCode = 0x80; |
2438
|
|
|
} |
2439
|
|
|
|
2440
|
14 |
|
if ($encoding === 'UTF-8') { |
2441
|
14 |
|
return \mb_encode_numericentity( |
2442
|
14 |
|
$str, |
2443
|
14 |
|
[$startCode, 0xfffff, 0, 0xfffff, 0] |
2444
|
|
|
); |
2445
|
|
|
} |
2446
|
|
|
|
2447
|
4 |
|
return \mb_encode_numericentity( |
2448
|
4 |
|
$str, |
2449
|
4 |
|
[$startCode, 0xfffff, 0, 0xfffff, 0], |
2450
|
4 |
|
$encoding |
2451
|
|
|
); |
2452
|
|
|
} |
2453
|
|
|
|
2454
|
|
|
// |
2455
|
|
|
// fallback via vanilla php |
2456
|
|
|
// |
2457
|
|
|
|
2458
|
|
|
return \implode( |
2459
|
|
|
'', |
2460
|
|
|
\array_map( |
2461
|
|
|
static function (string $chr) use ($keepAsciiChars, $encoding): string { |
2462
|
|
|
return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding); |
2463
|
|
|
}, |
2464
|
|
|
self::str_split($str) |
2465
|
|
|
) |
2466
|
|
|
); |
2467
|
|
|
} |
2468
|
|
|
|
2469
|
|
|
/** |
2470
|
|
|
* UTF-8 version of html_entity_decode() |
2471
|
|
|
* |
2472
|
|
|
* The reason we are not using html_entity_decode() by itself is because |
2473
|
|
|
* while it is not technically correct to leave out the semicolon |
2474
|
|
|
* at the end of an entity most browsers will still interpret the entity |
2475
|
|
|
* correctly. html_entity_decode() does not convert entities without |
2476
|
|
|
* semicolons, so we are left with our own little solution here. Bummer. |
2477
|
|
|
* |
2478
|
|
|
* Convert all HTML entities to their applicable characters |
2479
|
|
|
* |
2480
|
|
|
* INFO: opposite to UTF8::html_encode() |
2481
|
|
|
* |
2482
|
|
|
* @see http://php.net/manual/en/function.html-entity-decode.php |
2483
|
|
|
* |
2484
|
|
|
* @param string $str <p> |
2485
|
|
|
* The input string. |
2486
|
|
|
* </p> |
2487
|
|
|
* @param int $flags [optional] <p> |
2488
|
|
|
* A bitmask of one or more of the following flags, which specify how to handle quotes |
2489
|
|
|
* and which document type to use. The default is ENT_COMPAT | ENT_HTML401. |
2490
|
|
|
* <table> |
2491
|
|
|
* Available <i>flags</i> constants |
2492
|
|
|
* <tr valign="top"> |
2493
|
|
|
* <td>Constant Name</td> |
2494
|
|
|
* <td>Description</td> |
2495
|
|
|
* </tr> |
2496
|
|
|
* <tr valign="top"> |
2497
|
|
|
* <td><b>ENT_COMPAT</b></td> |
2498
|
|
|
* <td>Will convert double-quotes and leave single-quotes alone.</td> |
2499
|
|
|
* </tr> |
2500
|
|
|
* <tr valign="top"> |
2501
|
|
|
* <td><b>ENT_QUOTES</b></td> |
2502
|
|
|
* <td>Will convert both double and single quotes.</td> |
2503
|
|
|
* </tr> |
2504
|
|
|
* <tr valign="top"> |
2505
|
|
|
* <td><b>ENT_NOQUOTES</b></td> |
2506
|
|
|
* <td>Will leave both double and single quotes unconverted.</td> |
2507
|
|
|
* </tr> |
2508
|
|
|
* <tr valign="top"> |
2509
|
|
|
* <td><b>ENT_HTML401</b></td> |
2510
|
|
|
* <td> |
2511
|
|
|
* Handle code as HTML 4.01. |
2512
|
|
|
* </td> |
2513
|
|
|
* </tr> |
2514
|
|
|
* <tr valign="top"> |
2515
|
|
|
* <td><b>ENT_XML1</b></td> |
2516
|
|
|
* <td> |
2517
|
|
|
* Handle code as XML 1. |
2518
|
|
|
* </td> |
2519
|
|
|
* </tr> |
2520
|
|
|
* <tr valign="top"> |
2521
|
|
|
* <td><b>ENT_XHTML</b></td> |
2522
|
|
|
* <td> |
2523
|
|
|
* Handle code as XHTML. |
2524
|
|
|
* </td> |
2525
|
|
|
* </tr> |
2526
|
|
|
* <tr valign="top"> |
2527
|
|
|
* <td><b>ENT_HTML5</b></td> |
2528
|
|
|
* <td> |
2529
|
|
|
* Handle code as HTML 5. |
2530
|
|
|
* </td> |
2531
|
|
|
* </tr> |
2532
|
|
|
* </table> |
2533
|
|
|
* </p> |
2534
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
2535
|
|
|
* |
2536
|
|
|
* @return string the decoded string |
2537
|
|
|
*/ |
2538
|
46 |
|
public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string |
2539
|
|
|
{ |
2540
|
|
|
if ( |
2541
|
46 |
|
!isset($str[3]) // examples: &; || &x; |
2542
|
|
|
|| |
2543
|
46 |
|
\strpos($str, '&') === false // no "&" |
2544
|
|
|
) { |
2545
|
23 |
|
return $str; |
2546
|
|
|
} |
2547
|
|
|
|
2548
|
44 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
2549
|
9 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
2550
|
|
|
} |
2551
|
|
|
|
2552
|
44 |
|
if ($flags === null) { |
2553
|
10 |
|
$flags = \ENT_QUOTES | \ENT_HTML5; |
2554
|
|
|
} |
2555
|
|
|
|
2556
|
|
|
if ( |
2557
|
44 |
|
$encoding !== 'UTF-8' |
2558
|
|
|
&& |
2559
|
44 |
|
$encoding !== 'ISO-8859-1' |
2560
|
|
|
&& |
2561
|
44 |
|
$encoding !== 'WINDOWS-1252' |
2562
|
|
|
&& |
2563
|
44 |
|
self::$SUPPORT['mbstring'] === false |
2564
|
|
|
) { |
2565
|
|
|
\trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
2566
|
|
|
} |
2567
|
|
|
|
2568
|
|
|
do { |
2569
|
44 |
|
$str_compare = $str; |
2570
|
|
|
|
2571
|
|
|
// INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity |
2572
|
44 |
|
if (self::$SUPPORT['mbstring'] === true) { |
2573
|
44 |
|
if ($encoding === 'UTF-8') { |
2574
|
44 |
|
$str = \mb_decode_numericentity( |
2575
|
44 |
|
$str, |
2576
|
44 |
|
[0x80, 0xfffff, 0, 0xfffff, 0] |
2577
|
|
|
); |
2578
|
|
|
} else { |
2579
|
4 |
|
$str = \mb_decode_numericentity( |
2580
|
4 |
|
$str, |
2581
|
4 |
|
[0x80, 0xfffff, 0, 0xfffff, 0], |
2582
|
44 |
|
$encoding |
2583
|
|
|
); |
2584
|
|
|
} |
2585
|
|
|
} else { |
2586
|
|
|
$str = (string) \preg_replace_callback( |
2587
|
|
|
"/&#\d{2,6};/", |
2588
|
|
|
/** |
2589
|
|
|
* @param string[] $matches |
2590
|
|
|
* |
2591
|
|
|
* @return string |
2592
|
|
|
*/ |
2593
|
|
|
static function (array $matches) use ($encoding): string { |
2594
|
|
|
$returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES'); |
2595
|
|
|
if ($returnTmp !== '"' && $returnTmp !== "'") { |
2596
|
|
|
return $returnTmp; |
2597
|
|
|
} |
2598
|
|
|
|
2599
|
|
|
return $matches[0]; |
2600
|
|
|
}, |
2601
|
|
|
$str |
2602
|
|
|
); |
2603
|
|
|
} |
2604
|
|
|
|
2605
|
44 |
|
if (\strpos($str, '&') !== false) { |
2606
|
40 |
|
if (\strpos($str, '&#') !== false) { |
2607
|
|
|
// decode also numeric & UTF16 two byte entities |
2608
|
32 |
|
$str = (string) \preg_replace( |
2609
|
32 |
|
'/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S', |
2610
|
32 |
|
'$1;', |
2611
|
32 |
|
$str |
2612
|
|
|
); |
2613
|
|
|
} |
2614
|
|
|
|
2615
|
40 |
|
$str = \html_entity_decode( |
2616
|
40 |
|
$str, |
2617
|
40 |
|
$flags, |
2618
|
40 |
|
$encoding |
2619
|
|
|
); |
2620
|
|
|
} |
2621
|
44 |
|
} while ($str_compare !== $str); |
2622
|
|
|
|
2623
|
44 |
|
return $str; |
2624
|
|
|
} |
2625
|
|
|
|
2626
|
|
|
/** |
2627
|
|
|
* Create a escape html version of the string via "UTF8::htmlspecialchars()". |
2628
|
|
|
* |
2629
|
|
|
* @param string $str |
2630
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
2631
|
|
|
* |
2632
|
|
|
* @return string |
2633
|
|
|
*/ |
2634
|
6 |
|
public static function html_escape(string $str, string $encoding = 'UTF-8'): string |
2635
|
|
|
{ |
2636
|
6 |
|
return self::htmlspecialchars( |
2637
|
6 |
|
$str, |
2638
|
6 |
|
\ENT_QUOTES | \ENT_SUBSTITUTE, |
2639
|
6 |
|
$encoding |
2640
|
|
|
); |
2641
|
|
|
} |
2642
|
|
|
|
2643
|
|
|
/** |
2644
|
|
|
* Remove empty html-tag. |
2645
|
|
|
* |
2646
|
|
|
* e.g.: <tag></tag> |
2647
|
|
|
* |
2648
|
|
|
* @param string $str |
2649
|
|
|
* |
2650
|
|
|
* @return string |
2651
|
|
|
*/ |
2652
|
1 |
|
public static function html_stripe_empty_tags(string $str): string |
2653
|
|
|
{ |
2654
|
1 |
|
return (string) \preg_replace( |
2655
|
1 |
|
'/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u', |
2656
|
1 |
|
'', |
2657
|
1 |
|
$str |
2658
|
|
|
); |
2659
|
|
|
} |
2660
|
|
|
|
2661
|
|
|
/** |
2662
|
|
|
* Convert all applicable characters to HTML entities: UTF-8 version of htmlentities() |
2663
|
|
|
* |
2664
|
|
|
* @see http://php.net/manual/en/function.htmlentities.php |
2665
|
|
|
* |
2666
|
|
|
* @param string $str <p> |
2667
|
|
|
* The input string. |
2668
|
|
|
* </p> |
2669
|
|
|
* @param int $flags [optional] <p> |
2670
|
|
|
* A bitmask of one or more of the following flags, which specify how to handle |
2671
|
|
|
* quotes, invalid code unit sequences and the used document type. The default is |
2672
|
|
|
* ENT_COMPAT | ENT_HTML401. |
2673
|
|
|
* <table> |
2674
|
|
|
* Available <i>flags</i> constants |
2675
|
|
|
* <tr valign="top"> |
2676
|
|
|
* <td>Constant Name</td> |
2677
|
|
|
* <td>Description</td> |
2678
|
|
|
* </tr> |
2679
|
|
|
* <tr valign="top"> |
2680
|
|
|
* <td><b>ENT_COMPAT</b></td> |
2681
|
|
|
* <td>Will convert double-quotes and leave single-quotes alone.</td> |
2682
|
|
|
* </tr> |
2683
|
|
|
* <tr valign="top"> |
2684
|
|
|
* <td><b>ENT_QUOTES</b></td> |
2685
|
|
|
* <td>Will convert both double and single quotes.</td> |
2686
|
|
|
* </tr> |
2687
|
|
|
* <tr valign="top"> |
2688
|
|
|
* <td><b>ENT_NOQUOTES</b></td> |
2689
|
|
|
* <td>Will leave both double and single quotes unconverted.</td> |
2690
|
|
|
* </tr> |
2691
|
|
|
* <tr valign="top"> |
2692
|
|
|
* <td><b>ENT_IGNORE</b></td> |
2693
|
|
|
* <td> |
2694
|
|
|
* Silently discard invalid code unit sequences instead of returning |
2695
|
|
|
* an empty string. Using this flag is discouraged as it |
2696
|
|
|
* may have security implications. |
2697
|
|
|
* </td> |
2698
|
|
|
* </tr> |
2699
|
|
|
* <tr valign="top"> |
2700
|
|
|
* <td><b>ENT_SUBSTITUTE</b></td> |
2701
|
|
|
* <td> |
2702
|
|
|
* Replace invalid code unit sequences with a Unicode Replacement Character |
2703
|
|
|
* U+FFFD (UTF-8) or &#38;#FFFD; (otherwise) instead of returning an empty |
2704
|
|
|
* string. |
2705
|
|
|
* </td> |
2706
|
|
|
* </tr> |
2707
|
|
|
* <tr valign="top"> |
2708
|
|
|
* <td><b>ENT_DISALLOWED</b></td> |
2709
|
|
|
* <td> |
2710
|
|
|
* Replace invalid code points for the given document type with a |
2711
|
|
|
* Unicode Replacement Character U+FFFD (UTF-8) or &#38;#FFFD; |
2712
|
|
|
* (otherwise) instead of leaving them as is. This may be useful, for |
2713
|
|
|
* instance, to ensure the well-formedness of XML documents with |
2714
|
|
|
* embedded external content. |
2715
|
|
|
* </td> |
2716
|
|
|
* </tr> |
2717
|
|
|
* <tr valign="top"> |
2718
|
|
|
* <td><b>ENT_HTML401</b></td> |
2719
|
|
|
* <td> |
2720
|
|
|
* Handle code as HTML 4.01. |
2721
|
|
|
* </td> |
2722
|
|
|
* </tr> |
2723
|
|
|
* <tr valign="top"> |
2724
|
|
|
* <td><b>ENT_XML1</b></td> |
2725
|
|
|
* <td> |
2726
|
|
|
* Handle code as XML 1. |
2727
|
|
|
* </td> |
2728
|
|
|
* </tr> |
2729
|
|
|
* <tr valign="top"> |
2730
|
|
|
* <td><b>ENT_XHTML</b></td> |
2731
|
|
|
* <td> |
2732
|
|
|
* Handle code as XHTML. |
2733
|
|
|
* </td> |
2734
|
|
|
* </tr> |
2735
|
|
|
* <tr valign="top"> |
2736
|
|
|
* <td><b>ENT_HTML5</b></td> |
2737
|
|
|
* <td> |
2738
|
|
|
* Handle code as HTML 5. |
2739
|
|
|
* </td> |
2740
|
|
|
* </tr> |
2741
|
|
|
* </table> |
2742
|
|
|
* </p> |
2743
|
|
|
* @param string $encoding [optional] <p> |
2744
|
|
|
* Like <b>htmlspecialchars</b>, |
2745
|
|
|
* <b>htmlentities</b> takes an optional third argument |
2746
|
|
|
* <i>encoding</i> which defines encoding used in |
2747
|
|
|
* conversion. |
2748
|
|
|
* Although this argument is technically optional, you are highly |
2749
|
|
|
* encouraged to specify the correct value for your code. |
2750
|
|
|
* </p> |
2751
|
|
|
* @param bool $double_encode [optional] <p> |
2752
|
|
|
* When <i>double_encode</i> is turned off PHP will not |
2753
|
|
|
* encode existing html entities. The default is to convert everything. |
2754
|
|
|
* </p> |
2755
|
|
|
* |
2756
|
|
|
* @return string |
2757
|
|
|
* <p> |
2758
|
|
|
* The encoded string. |
2759
|
|
|
* <br><br> |
2760
|
|
|
* If the input <i>string</i> contains an invalid code unit |
2761
|
|
|
* sequence within the given <i>encoding</i> an empty string |
2762
|
|
|
* will be returned, unless either the <b>ENT_IGNORE</b> or |
2763
|
|
|
* <b>ENT_SUBSTITUTE</b> flags are set. |
2764
|
|
|
* </p> |
2765
|
|
|
*/ |
2766
|
9 |
|
public static function htmlentities( |
2767
|
|
|
string $str, |
2768
|
|
|
int $flags = \ENT_COMPAT, |
2769
|
|
|
string $encoding = 'UTF-8', |
2770
|
|
|
bool $double_encode = true |
2771
|
|
|
): string { |
2772
|
9 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
2773
|
7 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
2774
|
|
|
} |
2775
|
|
|
|
2776
|
9 |
|
$str = \htmlentities($str, $flags, $encoding, $double_encode); |
2777
|
|
|
|
2778
|
|
|
/** |
2779
|
|
|
* PHP doesn't replace a backslash to its html entity since this is something |
2780
|
|
|
* that's mostly used to escape characters when inserting in a database. Since |
2781
|
|
|
* we're using a decent database layer, we don't need this shit and we're replacing |
2782
|
|
|
* the double backslashes by its' html entity equivalent. |
2783
|
|
|
* |
2784
|
|
|
* https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303 |
2785
|
|
|
*/ |
2786
|
9 |
|
$str = \str_replace('\\', '\', $str); |
2787
|
|
|
|
2788
|
9 |
|
return self::html_encode($str, true, $encoding); |
2789
|
|
|
} |
2790
|
|
|
|
2791
|
|
|
/** |
2792
|
|
|
* Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars() |
2793
|
|
|
* |
2794
|
|
|
* INFO: Take a look at "UTF8::htmlentities()" |
2795
|
|
|
* |
2796
|
|
|
* @see http://php.net/manual/en/function.htmlspecialchars.php |
2797
|
|
|
* |
2798
|
|
|
* @param string $str <p> |
2799
|
|
|
* The string being converted. |
2800
|
|
|
* </p> |
2801
|
|
|
* @param int $flags [optional] <p> |
2802
|
|
|
* A bitmask of one or more of the following flags, which specify how to handle |
2803
|
|
|
* quotes, invalid code unit sequences and the used document type. The default is |
2804
|
|
|
* ENT_COMPAT | ENT_HTML401. |
2805
|
|
|
* <table> |
2806
|
|
|
* Available <i>flags</i> constants |
2807
|
|
|
* <tr valign="top"> |
2808
|
|
|
* <td>Constant Name</td> |
2809
|
|
|
* <td>Description</td> |
2810
|
|
|
* </tr> |
2811
|
|
|
* <tr valign="top"> |
2812
|
|
|
* <td><b>ENT_COMPAT</b></td> |
2813
|
|
|
* <td>Will convert double-quotes and leave single-quotes alone.</td> |
2814
|
|
|
* </tr> |
2815
|
|
|
* <tr valign="top"> |
2816
|
|
|
* <td><b>ENT_QUOTES</b></td> |
2817
|
|
|
* <td>Will convert both double and single quotes.</td> |
2818
|
|
|
* </tr> |
2819
|
|
|
* <tr valign="top"> |
2820
|
|
|
* <td><b>ENT_NOQUOTES</b></td> |
2821
|
|
|
* <td>Will leave both double and single quotes unconverted.</td> |
2822
|
|
|
* </tr> |
2823
|
|
|
* <tr valign="top"> |
2824
|
|
|
* <td><b>ENT_IGNORE</b></td> |
2825
|
|
|
* <td> |
2826
|
|
|
* Silently discard invalid code unit sequences instead of returning |
2827
|
|
|
* an empty string. Using this flag is discouraged as it |
2828
|
|
|
* may have security implications. |
2829
|
|
|
* </td> |
2830
|
|
|
* </tr> |
2831
|
|
|
* <tr valign="top"> |
2832
|
|
|
* <td><b>ENT_SUBSTITUTE</b></td> |
2833
|
|
|
* <td> |
2834
|
|
|
* Replace invalid code unit sequences with a Unicode Replacement Character |
2835
|
|
|
* U+FFFD (UTF-8) or &#38;#FFFD; (otherwise) instead of returning an empty |
2836
|
|
|
* string. |
2837
|
|
|
* </td> |
2838
|
|
|
* </tr> |
2839
|
|
|
* <tr valign="top"> |
2840
|
|
|
* <td><b>ENT_DISALLOWED</b></td> |
2841
|
|
|
* <td> |
2842
|
|
|
* Replace invalid code points for the given document type with a |
2843
|
|
|
* Unicode Replacement Character U+FFFD (UTF-8) or &#38;#FFFD; |
2844
|
|
|
* (otherwise) instead of leaving them as is. This may be useful, for |
2845
|
|
|
* instance, to ensure the well-formedness of XML documents with |
2846
|
|
|
* embedded external content. |
2847
|
|
|
* </td> |
2848
|
|
|
* </tr> |
2849
|
|
|
* <tr valign="top"> |
2850
|
|
|
* <td><b>ENT_HTML401</b></td> |
2851
|
|
|
* <td> |
2852
|
|
|
* Handle code as HTML 4.01. |
2853
|
|
|
* </td> |
2854
|
|
|
* </tr> |
2855
|
|
|
* <tr valign="top"> |
2856
|
|
|
* <td><b>ENT_XML1</b></td> |
2857
|
|
|
* <td> |
2858
|
|
|
* Handle code as XML 1. |
2859
|
|
|
* </td> |
2860
|
|
|
* </tr> |
2861
|
|
|
* <tr valign="top"> |
2862
|
|
|
* <td><b>ENT_XHTML</b></td> |
2863
|
|
|
* <td> |
2864
|
|
|
* Handle code as XHTML. |
2865
|
|
|
* </td> |
2866
|
|
|
* </tr> |
2867
|
|
|
* <tr valign="top"> |
2868
|
|
|
* <td><b>ENT_HTML5</b></td> |
2869
|
|
|
* <td> |
2870
|
|
|
* Handle code as HTML 5. |
2871
|
|
|
* </td> |
2872
|
|
|
* </tr> |
2873
|
|
|
* </table> |
2874
|
|
|
* </p> |
2875
|
|
|
* @param string $encoding [optional] <p> |
2876
|
|
|
* Defines encoding used in conversion. |
2877
|
|
|
* </p> |
2878
|
|
|
* <p> |
2879
|
|
|
* For the purposes of this function, the encodings |
2880
|
|
|
* ISO-8859-1, ISO-8859-15, |
2881
|
|
|
* UTF-8, cp866, |
2882
|
|
|
* cp1251, cp1252, and |
2883
|
|
|
* KOI8-R are effectively equivalent, provided the |
2884
|
|
|
* <i>string</i> itself is valid for the encoding, as |
2885
|
|
|
* the characters affected by <b>htmlspecialchars</b> occupy |
2886
|
|
|
* the same positions in all of these encodings. |
2887
|
|
|
* </p> |
2888
|
|
|
* @param bool $double_encode [optional] <p> |
2889
|
|
|
* When <i>double_encode</i> is turned off PHP will not |
2890
|
|
|
* encode existing html entities, the default is to convert everything. |
2891
|
|
|
* </p> |
2892
|
|
|
* |
2893
|
|
|
* @return string the converted string. |
2894
|
|
|
* </p> |
2895
|
|
|
* <p> |
2896
|
|
|
* If the input <i>string</i> contains an invalid code unit |
2897
|
|
|
* sequence within the given <i>encoding</i> an empty string |
2898
|
|
|
* will be returned, unless either the <b>ENT_IGNORE</b> or |
2899
|
|
|
* <b>ENT_SUBSTITUTE</b> flags are set |
2900
|
|
|
*/ |
2901
|
8 |
|
public static function htmlspecialchars( |
2902
|
|
|
string $str, |
2903
|
|
|
int $flags = \ENT_COMPAT, |
2904
|
|
|
string $encoding = 'UTF-8', |
2905
|
|
|
bool $double_encode = true |
2906
|
|
|
): string { |
2907
|
8 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
2908
|
8 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
2909
|
|
|
} |
2910
|
|
|
|
2911
|
8 |
|
return \htmlspecialchars($str, $flags, $encoding, $double_encode); |
2912
|
|
|
} |
2913
|
|
|
|
2914
|
|
|
/** |
2915
|
|
|
* Checks whether iconv is available on the server. |
2916
|
|
|
* |
2917
|
|
|
* @return bool |
2918
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
2919
|
|
|
*/ |
2920
|
|
|
public static function iconv_loaded(): bool |
2921
|
|
|
{ |
2922
|
|
|
return \extension_loaded('iconv'); |
2923
|
|
|
} |
2924
|
|
|
|
2925
|
|
|
/** |
2926
|
|
|
* alias for "UTF8::decimal_to_chr()" |
2927
|
|
|
* |
2928
|
|
|
* @param mixed $int |
2929
|
|
|
* |
2930
|
|
|
* @return string |
2931
|
|
|
* |
2932
|
|
|
* @see UTF8::decimal_to_chr() |
2933
|
|
|
*/ |
2934
|
4 |
|
public static function int_to_chr($int): string |
2935
|
|
|
{ |
2936
|
4 |
|
return self::decimal_to_chr($int); |
2937
|
|
|
} |
2938
|
|
|
|
2939
|
|
|
/** |
2940
|
|
|
* Converts Integer to hexadecimal U+xxxx code point representation. |
2941
|
|
|
* |
2942
|
|
|
* INFO: opposite to UTF8::hex_to_int() |
2943
|
|
|
* |
2944
|
|
|
* @param int $int <p>The integer to be converted to hexadecimal code point.</p> |
2945
|
|
|
* @param string $pfix [optional] |
2946
|
|
|
* |
2947
|
|
|
* @return string the code point, or empty string on failure |
2948
|
|
|
*/ |
2949
|
6 |
|
public static function int_to_hex(int $int, string $pfix = 'U+'): string |
2950
|
|
|
{ |
2951
|
6 |
|
$hex = \dechex($int); |
2952
|
|
|
|
2953
|
6 |
|
$hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex); |
2954
|
|
|
|
2955
|
6 |
|
return $pfix . $hex . ''; |
2956
|
|
|
} |
2957
|
|
|
|
2958
|
|
|
/** |
2959
|
|
|
* Checks whether intl-char is available on the server. |
2960
|
|
|
* |
2961
|
|
|
* @return bool |
2962
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
2963
|
|
|
*/ |
2964
|
|
|
public static function intlChar_loaded(): bool |
2965
|
|
|
{ |
2966
|
|
|
return \class_exists('IntlChar'); |
2967
|
|
|
} |
2968
|
|
|
|
2969
|
|
|
/** |
2970
|
|
|
* Checks whether intl is available on the server. |
2971
|
|
|
* |
2972
|
|
|
* @return bool |
2973
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
2974
|
|
|
*/ |
2975
|
5 |
|
public static function intl_loaded(): bool |
2976
|
|
|
{ |
2977
|
5 |
|
return \extension_loaded('intl'); |
2978
|
|
|
} |
2979
|
|
|
|
2980
|
|
|
/** |
2981
|
|
|
* alias for "UTF8::is_ascii()" |
2982
|
|
|
* |
2983
|
|
|
* @param string $str |
2984
|
|
|
* |
2985
|
|
|
* @return bool |
2986
|
|
|
* |
2987
|
|
|
* @see UTF8::is_ascii() |
2988
|
|
|
* @deprecated <p>use "UTF8::is_ascii()"</p> |
2989
|
|
|
*/ |
2990
|
2 |
|
public static function isAscii(string $str): bool |
2991
|
|
|
{ |
2992
|
2 |
|
return self::is_ascii($str); |
2993
|
|
|
} |
2994
|
|
|
|
2995
|
|
|
/** |
2996
|
|
|
* alias for "UTF8::is_base64()" |
2997
|
|
|
* |
2998
|
|
|
* @param string $str |
2999
|
|
|
* |
3000
|
|
|
* @return bool |
3001
|
|
|
* |
3002
|
|
|
* @see UTF8::is_base64() |
3003
|
|
|
* @deprecated <p>use "UTF8::is_base64()"</p> |
3004
|
|
|
*/ |
3005
|
2 |
|
public static function isBase64($str): bool |
3006
|
|
|
{ |
3007
|
2 |
|
return self::is_base64($str); |
3008
|
|
|
} |
3009
|
|
|
|
3010
|
|
|
/** |
3011
|
|
|
* alias for "UTF8::is_binary()" |
3012
|
|
|
* |
3013
|
|
|
* @param mixed $str |
3014
|
|
|
* @param bool $strict |
3015
|
|
|
* |
3016
|
|
|
* @return bool |
3017
|
|
|
* |
3018
|
|
|
* @see UTF8::is_binary() |
3019
|
|
|
* @deprecated <p>use "UTF8::is_binary()"</p> |
3020
|
|
|
*/ |
3021
|
4 |
|
public static function isBinary($str, $strict = false): bool |
3022
|
|
|
{ |
3023
|
4 |
|
return self::is_binary($str, $strict); |
3024
|
|
|
} |
3025
|
|
|
|
3026
|
|
|
/** |
3027
|
|
|
* alias for "UTF8::is_bom()" |
3028
|
|
|
* |
3029
|
|
|
* @param string $utf8_chr |
3030
|
|
|
* |
3031
|
|
|
* @return bool |
3032
|
|
|
* |
3033
|
|
|
* @see UTF8::is_bom() |
3034
|
|
|
* @deprecated <p>use "UTF8::is_bom()"</p> |
3035
|
|
|
*/ |
3036
|
2 |
|
public static function isBom(string $utf8_chr): bool |
3037
|
|
|
{ |
3038
|
2 |
|
return self::is_bom($utf8_chr); |
3039
|
|
|
} |
3040
|
|
|
|
3041
|
|
|
/** |
3042
|
|
|
* alias for "UTF8::is_html()" |
3043
|
|
|
* |
3044
|
|
|
* @param string $str |
3045
|
|
|
* |
3046
|
|
|
* @return bool |
3047
|
|
|
* |
3048
|
|
|
* @see UTF8::is_html() |
3049
|
|
|
* @deprecated <p>use "UTF8::is_html()"</p> |
3050
|
|
|
*/ |
3051
|
2 |
|
public static function isHtml(string $str): bool |
3052
|
|
|
{ |
3053
|
2 |
|
return self::is_html($str); |
3054
|
|
|
} |
3055
|
|
|
|
3056
|
|
|
/** |
3057
|
|
|
* alias for "UTF8::is_json()" |
3058
|
|
|
* |
3059
|
|
|
* @param string $str |
3060
|
|
|
* |
3061
|
|
|
* @return bool |
3062
|
|
|
* |
3063
|
|
|
* @see UTF8::is_json() |
3064
|
|
|
* @deprecated <p>use "UTF8::is_json()"</p> |
3065
|
|
|
*/ |
3066
|
|
|
public static function isJson(string $str): bool |
3067
|
|
|
{ |
3068
|
|
|
return self::is_json($str); |
3069
|
|
|
} |
3070
|
|
|
|
3071
|
|
|
/** |
3072
|
|
|
* alias for "UTF8::is_utf16()" |
3073
|
|
|
* |
3074
|
|
|
* @param mixed $str |
3075
|
|
|
* |
3076
|
|
|
* @return false|int |
3077
|
|
|
* <strong>false</strong> if is't not UTF16,<br> |
3078
|
|
|
* <strong>1</strong> for UTF-16LE,<br> |
3079
|
|
|
* <strong>2</strong> for UTF-16BE |
3080
|
|
|
* |
3081
|
|
|
* @see UTF8::is_utf16() |
3082
|
|
|
* @deprecated <p>use "UTF8::is_utf16()"</p> |
3083
|
|
|
*/ |
3084
|
2 |
|
public static function isUtf16($str) |
3085
|
|
|
{ |
3086
|
2 |
|
return self::is_utf16($str); |
3087
|
|
|
} |
3088
|
|
|
|
3089
|
|
|
/** |
3090
|
|
|
* alias for "UTF8::is_utf32()" |
3091
|
|
|
* |
3092
|
|
|
* @param mixed $str |
3093
|
|
|
* |
3094
|
|
|
* @return false|int |
3095
|
|
|
* <strong>false</strong> if is't not UTF16, |
3096
|
|
|
* <strong>1</strong> for UTF-32LE, |
3097
|
|
|
* <strong>2</strong> for UTF-32BE |
3098
|
|
|
* |
3099
|
|
|
* @see UTF8::is_utf32() |
3100
|
|
|
* @deprecated <p>use "UTF8::is_utf32()"</p> |
3101
|
|
|
*/ |
3102
|
2 |
|
public static function isUtf32($str) |
3103
|
|
|
{ |
3104
|
2 |
|
return self::is_utf32($str); |
3105
|
|
|
} |
3106
|
|
|
|
3107
|
|
|
/** |
3108
|
|
|
* alias for "UTF8::is_utf8()" |
3109
|
|
|
* |
3110
|
|
|
* @param string $str |
3111
|
|
|
* @param bool $strict |
3112
|
|
|
* |
3113
|
|
|
* @return bool |
3114
|
|
|
* |
3115
|
|
|
* @see UTF8::is_utf8() |
3116
|
|
|
* @deprecated <p>use "UTF8::is_utf8()"</p> |
3117
|
|
|
*/ |
3118
|
17 |
|
public static function isUtf8($str, $strict = false): bool |
3119
|
|
|
{ |
3120
|
17 |
|
return self::is_utf8($str, $strict); |
3121
|
|
|
} |
3122
|
|
|
|
3123
|
|
|
/** |
3124
|
|
|
* Returns true if the string contains only alphabetic chars, false otherwise. |
3125
|
|
|
* |
3126
|
|
|
* @param string $str |
3127
|
|
|
* |
3128
|
|
|
* @return bool |
3129
|
|
|
* Whether or not $str contains only alphabetic chars |
3130
|
|
|
*/ |
3131
|
10 |
|
public static function is_alpha(string $str): bool |
3132
|
|
|
{ |
3133
|
10 |
|
if (self::$SUPPORT['mbstring'] === true) { |
3134
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3135
|
10 |
|
return \mb_ereg_match('^[[:alpha:]]*$', $str); |
3136
|
|
|
} |
3137
|
|
|
|
3138
|
|
|
return self::str_matches_pattern($str, '^[[:alpha:]]*$'); |
3139
|
|
|
} |
3140
|
|
|
|
3141
|
|
|
/** |
3142
|
|
|
* Returns true if the string contains only alphabetic and numeric chars, false otherwise. |
3143
|
|
|
* |
3144
|
|
|
* @param string $str |
3145
|
|
|
* |
3146
|
|
|
* @return bool |
3147
|
|
|
* Whether or not $str contains only alphanumeric chars |
3148
|
|
|
*/ |
3149
|
13 |
|
public static function is_alphanumeric(string $str): bool |
3150
|
|
|
{ |
3151
|
13 |
|
if (self::$SUPPORT['mbstring'] === true) { |
3152
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3153
|
13 |
|
return \mb_ereg_match('^[[:alnum:]]*$', $str); |
3154
|
|
|
} |
3155
|
|
|
|
3156
|
|
|
return self::str_matches_pattern($str, '^[[:alnum:]]*$'); |
3157
|
|
|
} |
3158
|
|
|
|
3159
|
|
|
/** |
3160
|
|
|
* Checks if a string is 7 bit ASCII. |
3161
|
|
|
* |
3162
|
|
|
* @param string $str <p>The string to check.</p> |
3163
|
|
|
* |
3164
|
|
|
* @return bool |
3165
|
|
|
* <strong>true</strong> if it is ASCII<br> |
3166
|
|
|
* <strong>false</strong> otherwise |
3167
|
|
|
*/ |
3168
|
137 |
|
public static function is_ascii(string $str): bool |
3169
|
|
|
{ |
3170
|
137 |
|
if ($str === '') { |
3171
|
10 |
|
return true; |
3172
|
|
|
} |
3173
|
|
|
|
3174
|
136 |
|
return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str); |
3175
|
|
|
} |
3176
|
|
|
|
3177
|
|
|
/** |
3178
|
|
|
* Returns true if the string is base64 encoded, false otherwise. |
3179
|
|
|
* |
3180
|
|
|
* @param mixed|string $str <p>The input string.</p> |
3181
|
|
|
* @param bool $emptyStringIsValid [optional] <p>Is an empty string valid base64 or not?</p> |
3182
|
|
|
* |
3183
|
|
|
* @return bool whether or not $str is base64 encoded |
3184
|
|
|
*/ |
3185
|
16 |
|
public static function is_base64($str, $emptyStringIsValid = false): bool |
3186
|
|
|
{ |
3187
|
16 |
|
if ($emptyStringIsValid === false && $str === '') { |
3188
|
3 |
|
return false; |
3189
|
|
|
} |
3190
|
|
|
|
3191
|
|
|
/** |
3192
|
|
|
* @psalm-suppress RedundantConditionGivenDocblockType |
3193
|
|
|
*/ |
3194
|
15 |
|
if (\is_string($str) === false) { |
3195
|
2 |
|
return false; |
3196
|
|
|
} |
3197
|
|
|
|
3198
|
15 |
|
$base64String = \base64_decode($str, true); |
3199
|
|
|
|
3200
|
15 |
|
return $base64String !== false && \base64_encode($base64String) === $str; |
3201
|
|
|
} |
3202
|
|
|
|
3203
|
|
|
/** |
3204
|
|
|
* Check if the input is binary... (is look like a hack). |
3205
|
|
|
* |
3206
|
|
|
* @param mixed $input |
3207
|
|
|
* @param bool $strict |
3208
|
|
|
* |
3209
|
|
|
* @return bool |
3210
|
|
|
*/ |
3211
|
39 |
|
public static function is_binary($input, bool $strict = false): bool |
3212
|
|
|
{ |
3213
|
39 |
|
$input = (string) $input; |
3214
|
39 |
|
if ($input === '') { |
3215
|
10 |
|
return false; |
3216
|
|
|
} |
3217
|
|
|
|
3218
|
39 |
|
if (\preg_match('~^[01]+$~', $input)) { |
3219
|
13 |
|
return true; |
3220
|
|
|
} |
3221
|
|
|
|
3222
|
39 |
|
$ext = self::get_file_type($input); |
3223
|
39 |
|
if ($ext['type'] === 'binary') { |
3224
|
7 |
|
return true; |
3225
|
|
|
} |
3226
|
|
|
|
3227
|
36 |
|
$testLength = \strlen($input); |
3228
|
36 |
|
$testNull = \substr_count($input, "\x0", 0, $testLength); |
3229
|
36 |
|
if (($testNull / $testLength) > 0.25) { |
3230
|
12 |
|
return true; |
3231
|
|
|
} |
3232
|
|
|
|
3233
|
34 |
|
if ($strict === true) { |
3234
|
34 |
|
if (self::$SUPPORT['finfo'] === false) { |
3235
|
|
|
throw new \RuntimeException('ext-fileinfo: is not installed'); |
3236
|
|
|
} |
3237
|
|
|
|
3238
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3239
|
34 |
|
$finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input); |
3240
|
34 |
|
if ($finfo_encoding && $finfo_encoding === 'binary') { |
3241
|
15 |
|
return true; |
3242
|
|
|
} |
3243
|
|
|
} |
3244
|
|
|
|
3245
|
30 |
|
return false; |
3246
|
|
|
} |
3247
|
|
|
|
3248
|
|
|
/** |
3249
|
|
|
* Check if the file is binary. |
3250
|
|
|
* |
3251
|
|
|
* @param string $file |
3252
|
|
|
* |
3253
|
|
|
* @return bool |
3254
|
|
|
*/ |
3255
|
6 |
|
public static function is_binary_file($file): bool |
3256
|
|
|
{ |
3257
|
|
|
// init |
3258
|
6 |
|
$block = ''; |
3259
|
|
|
|
3260
|
6 |
|
$fp = \fopen($file, 'rb'); |
3261
|
6 |
|
if (\is_resource($fp)) { |
3262
|
6 |
|
$block = \fread($fp, 512); |
3263
|
6 |
|
\fclose($fp); |
3264
|
|
|
} |
3265
|
|
|
|
3266
|
6 |
|
if ($block === '') { |
3267
|
2 |
|
return false; |
3268
|
|
|
} |
3269
|
|
|
|
3270
|
6 |
|
return self::is_binary($block, true); |
3271
|
|
|
} |
3272
|
|
|
|
3273
|
|
|
/** |
3274
|
|
|
* Returns true if the string contains only whitespace chars, false otherwise. |
3275
|
|
|
* |
3276
|
|
|
* @param string $str |
3277
|
|
|
* |
3278
|
|
|
* @return bool |
3279
|
|
|
* Whether or not $str contains only whitespace characters |
3280
|
|
|
*/ |
3281
|
15 |
|
public static function is_blank(string $str): bool |
3282
|
|
|
{ |
3283
|
15 |
|
if (self::$SUPPORT['mbstring'] === true) { |
3284
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3285
|
15 |
|
return \mb_ereg_match('^[[:space:]]*$', $str); |
3286
|
|
|
} |
3287
|
|
|
|
3288
|
|
|
return self::str_matches_pattern($str, '^[[:space:]]*$'); |
3289
|
|
|
} |
3290
|
|
|
|
3291
|
|
|
/** |
3292
|
|
|
* Checks if the given string is equal to any "Byte Order Mark". |
3293
|
|
|
* |
3294
|
|
|
* WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string. |
3295
|
|
|
* |
3296
|
|
|
* @param string $str <p>The input string.</p> |
3297
|
|
|
* |
3298
|
|
|
* @return bool |
3299
|
|
|
* <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise |
3300
|
|
|
*/ |
3301
|
2 |
|
public static function is_bom($str): bool |
3302
|
|
|
{ |
3303
|
|
|
/** @noinspection PhpUnusedLocalVariableInspection */ |
3304
|
2 |
|
foreach (self::$BOM as $bomString => &$bomByteLength) { |
3305
|
2 |
|
if ($str === $bomString) { |
3306
|
2 |
|
return true; |
3307
|
|
|
} |
3308
|
|
|
} |
3309
|
|
|
|
3310
|
2 |
|
return false; |
3311
|
|
|
} |
3312
|
|
|
|
3313
|
|
|
/** |
3314
|
|
|
* Determine whether the string is considered to be empty. |
3315
|
|
|
* |
3316
|
|
|
* A variable is considered empty if it does not exist or if its value equals FALSE. |
3317
|
|
|
* empty() does not generate a warning if the variable does not exist. |
3318
|
|
|
* |
3319
|
|
|
* @param mixed $str |
3320
|
|
|
* |
3321
|
|
|
* @return bool whether or not $str is empty() |
3322
|
|
|
*/ |
3323
|
|
|
public static function is_empty($str): bool |
3324
|
|
|
{ |
3325
|
|
|
return empty($str); |
3326
|
|
|
} |
3327
|
|
|
|
3328
|
|
|
/** |
3329
|
|
|
* Returns true if the string contains only hexadecimal chars, false otherwise. |
3330
|
|
|
* |
3331
|
|
|
* @param string $str |
3332
|
|
|
* |
3333
|
|
|
* @return bool |
3334
|
|
|
* Whether or not $str contains only hexadecimal chars |
3335
|
|
|
*/ |
3336
|
13 |
|
public static function is_hexadecimal(string $str): bool |
3337
|
|
|
{ |
3338
|
13 |
|
if (self::$SUPPORT['mbstring'] === true) { |
3339
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3340
|
13 |
|
return \mb_ereg_match('^[[:xdigit:]]*$', $str); |
3341
|
|
|
} |
3342
|
|
|
|
3343
|
|
|
return self::str_matches_pattern($str, '^[[:xdigit:]]*$'); |
3344
|
|
|
} |
3345
|
|
|
|
3346
|
|
|
/** |
3347
|
|
|
* Check if the string contains any html-tags <lall>. |
3348
|
|
|
* |
3349
|
|
|
* @param string $str <p>The input string.</p> |
3350
|
|
|
* |
3351
|
|
|
* @return bool |
3352
|
|
|
*/ |
3353
|
3 |
|
public static function is_html(string $str): bool |
3354
|
|
|
{ |
3355
|
3 |
|
if ($str === '') { |
3356
|
3 |
|
return false; |
3357
|
|
|
} |
3358
|
|
|
|
3359
|
|
|
// init |
3360
|
3 |
|
$matches = []; |
3361
|
|
|
|
3362
|
3 |
|
$str = self::emoji_encode($str); // hack for emoji support :/ |
3363
|
|
|
|
3364
|
3 |
|
\preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches); |
3365
|
|
|
|
3366
|
3 |
|
return \count($matches) !== 0; |
3367
|
|
|
} |
3368
|
|
|
|
3369
|
|
|
/** |
3370
|
|
|
* Try to check if "$str" is an json-string. |
3371
|
|
|
* |
3372
|
|
|
* @param string $str <p>The input string.</p> |
3373
|
|
|
* @param bool $onlyArrayOrObjectResultsAreValid [optional] <p>Only array and objects are valid json results.</p> |
3374
|
|
|
* |
3375
|
|
|
* @return bool |
3376
|
|
|
*/ |
3377
|
42 |
|
public static function is_json(string $str, $onlyArrayOrObjectResultsAreValid = true): bool |
3378
|
|
|
{ |
3379
|
42 |
|
if ($str === '') { |
3380
|
4 |
|
return false; |
3381
|
|
|
} |
3382
|
|
|
|
3383
|
40 |
|
if (self::$SUPPORT['json'] === false) { |
3384
|
|
|
throw new \RuntimeException('ext-json: is not installed'); |
3385
|
|
|
} |
3386
|
|
|
|
3387
|
40 |
|
$json = self::json_decode($str); |
3388
|
40 |
|
if ($json === null && \strtoupper($str) !== 'NULL') { |
3389
|
18 |
|
return false; |
3390
|
|
|
} |
3391
|
|
|
|
3392
|
|
|
if ( |
3393
|
24 |
|
$onlyArrayOrObjectResultsAreValid === true |
3394
|
|
|
&& |
3395
|
24 |
|
\is_object($json) === false |
3396
|
|
|
&& |
3397
|
24 |
|
\is_array($json) === false |
3398
|
|
|
) { |
3399
|
5 |
|
return false; |
3400
|
|
|
} |
3401
|
|
|
|
3402
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3403
|
19 |
|
return \json_last_error() === \JSON_ERROR_NONE; |
3404
|
|
|
} |
3405
|
|
|
|
3406
|
|
|
/** |
3407
|
|
|
* @param string $str |
3408
|
|
|
* |
3409
|
|
|
* @return bool |
3410
|
|
|
*/ |
3411
|
8 |
|
public static function is_lowercase(string $str): bool |
3412
|
|
|
{ |
3413
|
8 |
|
if (self::$SUPPORT['mbstring'] === true) { |
3414
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3415
|
8 |
|
return \mb_ereg_match('^[[:lower:]]*$', $str); |
3416
|
|
|
} |
3417
|
|
|
|
3418
|
|
|
return self::str_matches_pattern($str, '^[[:lower:]]*$'); |
3419
|
|
|
} |
3420
|
|
|
|
3421
|
|
|
/** |
3422
|
|
|
* Returns true if the string is serialized, false otherwise. |
3423
|
|
|
* |
3424
|
|
|
* @param string $str |
3425
|
|
|
* |
3426
|
|
|
* @return bool whether or not $str is serialized |
3427
|
|
|
*/ |
3428
|
7 |
|
public static function is_serialized(string $str): bool |
3429
|
|
|
{ |
3430
|
7 |
|
if ($str === '') { |
3431
|
1 |
|
return false; |
3432
|
|
|
} |
3433
|
|
|
|
3434
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
3435
|
|
|
/** @noinspection UnserializeExploitsInspection */ |
3436
|
6 |
|
return $str === 'b:0;' |
3437
|
|
|
|| |
3438
|
6 |
|
@\unserialize($str) !== false; |
3439
|
|
|
} |
3440
|
|
|
|
3441
|
|
|
/** |
3442
|
|
|
* Returns true if the string contains only lower case chars, false |
3443
|
|
|
* otherwise. |
3444
|
|
|
* |
3445
|
|
|
* @param string $str <p>The input string.</p> |
3446
|
|
|
* |
3447
|
|
|
* @return bool |
3448
|
|
|
* Whether or not $str contains only lower case characters |
3449
|
|
|
*/ |
3450
|
8 |
|
public static function is_uppercase(string $str): bool |
3451
|
|
|
{ |
3452
|
8 |
|
if (self::$SUPPORT['mbstring'] === true) { |
3453
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3454
|
8 |
|
return \mb_ereg_match('^[[:upper:]]*$', $str); |
3455
|
|
|
} |
3456
|
|
|
|
3457
|
|
|
return self::str_matches_pattern($str, '^[[:upper:]]*$'); |
3458
|
|
|
} |
3459
|
|
|
|
3460
|
|
|
/** |
3461
|
|
|
* Check if the string is UTF-16. |
3462
|
|
|
* |
3463
|
|
|
* @param mixed $str <p>The input string.</p> |
3464
|
|
|
* @param bool $checkIfStringIsBinary |
3465
|
|
|
* |
3466
|
|
|
* @return false|int |
3467
|
|
|
* <strong>false</strong> if is't not UTF-16,<br> |
3468
|
|
|
* <strong>1</strong> for UTF-16LE,<br> |
3469
|
|
|
* <strong>2</strong> for UTF-16BE |
3470
|
|
|
*/ |
3471
|
22 |
|
public static function is_utf16($str, $checkIfStringIsBinary = true) |
3472
|
|
|
{ |
3473
|
|
|
// init |
3474
|
22 |
|
$str = (string) $str; |
3475
|
22 |
|
$strChars = []; |
3476
|
|
|
|
3477
|
|
|
if ( |
3478
|
22 |
|
$checkIfStringIsBinary === true |
3479
|
|
|
&& |
3480
|
22 |
|
self::is_binary($str, true) === false |
3481
|
|
|
) { |
3482
|
2 |
|
return false; |
3483
|
|
|
} |
3484
|
|
|
|
3485
|
22 |
|
if (self::$SUPPORT['mbstring'] === false) { |
3486
|
3 |
|
\trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING); |
3487
|
|
|
} |
3488
|
|
|
|
3489
|
22 |
|
$str = self::remove_bom($str); |
3490
|
|
|
|
3491
|
22 |
|
$maybeUTF16LE = 0; |
3492
|
22 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE'); |
3493
|
22 |
|
if ($test) { |
3494
|
15 |
|
$test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8'); |
3495
|
15 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE'); |
3496
|
15 |
|
if ($test3 === $test) { |
3497
|
15 |
|
if (\count($strChars) === 0) { |
3498
|
15 |
|
$strChars = self::count_chars($str, true, false); |
3499
|
|
|
} |
3500
|
15 |
|
foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { |
|
|
|
|
3501
|
15 |
|
if (\in_array($test3char, $strChars, true) === true) { |
3502
|
15 |
|
++$maybeUTF16LE; |
3503
|
|
|
} |
3504
|
|
|
} |
3505
|
15 |
|
unset($test3charEmpty); |
3506
|
|
|
} |
3507
|
|
|
} |
3508
|
|
|
|
3509
|
22 |
|
$maybeUTF16BE = 0; |
3510
|
22 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE'); |
3511
|
22 |
|
if ($test) { |
3512
|
15 |
|
$test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8'); |
3513
|
15 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE'); |
3514
|
15 |
|
if ($test3 === $test) { |
3515
|
15 |
|
if (\count($strChars) === 0) { |
3516
|
7 |
|
$strChars = self::count_chars($str, true, false); |
3517
|
|
|
} |
3518
|
15 |
|
foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { |
|
|
|
|
3519
|
15 |
|
if (\in_array($test3char, $strChars, true) === true) { |
3520
|
15 |
|
++$maybeUTF16BE; |
3521
|
|
|
} |
3522
|
|
|
} |
3523
|
15 |
|
unset($test3charEmpty); |
3524
|
|
|
} |
3525
|
|
|
} |
3526
|
|
|
|
3527
|
22 |
|
if ($maybeUTF16BE !== $maybeUTF16LE) { |
3528
|
6 |
|
if ($maybeUTF16LE > $maybeUTF16BE) { |
3529
|
4 |
|
return 1; |
3530
|
|
|
} |
3531
|
|
|
|
3532
|
6 |
|
return 2; |
3533
|
|
|
} |
3534
|
|
|
|
3535
|
18 |
|
return false; |
3536
|
|
|
} |
3537
|
|
|
|
3538
|
|
|
/** |
3539
|
|
|
* Check if the string is UTF-32. |
3540
|
|
|
* |
3541
|
|
|
* @param mixed $str <p>The input string.</p> |
3542
|
|
|
* @param bool $checkIfStringIsBinary |
3543
|
|
|
* |
3544
|
|
|
* @return false|int |
3545
|
|
|
* <strong>false</strong> if is't not UTF-32,<br> |
3546
|
|
|
* <strong>1</strong> for UTF-32LE,<br> |
3547
|
|
|
* <strong>2</strong> for UTF-32BE |
3548
|
|
|
*/ |
3549
|
18 |
|
public static function is_utf32($str, $checkIfStringIsBinary = true) |
3550
|
|
|
{ |
3551
|
|
|
// init |
3552
|
18 |
|
$str = (string) $str; |
3553
|
18 |
|
$strChars = []; |
3554
|
|
|
|
3555
|
|
|
if ( |
3556
|
18 |
|
$checkIfStringIsBinary === true |
3557
|
|
|
&& |
3558
|
18 |
|
self::is_binary($str, true) === false |
3559
|
|
|
) { |
3560
|
2 |
|
return false; |
3561
|
|
|
} |
3562
|
|
|
|
3563
|
18 |
|
if (self::$SUPPORT['mbstring'] === false) { |
3564
|
3 |
|
\trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING); |
3565
|
|
|
} |
3566
|
|
|
|
3567
|
18 |
|
$str = self::remove_bom($str); |
3568
|
|
|
|
3569
|
18 |
|
$maybeUTF32LE = 0; |
3570
|
18 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE'); |
3571
|
18 |
|
if ($test) { |
3572
|
11 |
|
$test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8'); |
3573
|
11 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE'); |
3574
|
11 |
|
if ($test3 === $test) { |
3575
|
11 |
|
if (\count($strChars) === 0) { |
3576
|
11 |
|
$strChars = self::count_chars($str, true, false); |
3577
|
|
|
} |
3578
|
11 |
|
foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { |
|
|
|
|
3579
|
11 |
|
if (\in_array($test3char, $strChars, true) === true) { |
3580
|
11 |
|
++$maybeUTF32LE; |
3581
|
|
|
} |
3582
|
|
|
} |
3583
|
11 |
|
unset($test3charEmpty); |
3584
|
|
|
} |
3585
|
|
|
} |
3586
|
|
|
|
3587
|
18 |
|
$maybeUTF32BE = 0; |
3588
|
18 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE'); |
3589
|
18 |
|
if ($test) { |
3590
|
11 |
|
$test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8'); |
3591
|
11 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE'); |
3592
|
11 |
|
if ($test3 === $test) { |
3593
|
11 |
|
if (\count($strChars) === 0) { |
3594
|
7 |
|
$strChars = self::count_chars($str, true, false); |
3595
|
|
|
} |
3596
|
11 |
|
foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { |
|
|
|
|
3597
|
11 |
|
if (\in_array($test3char, $strChars, true) === true) { |
3598
|
11 |
|
++$maybeUTF32BE; |
3599
|
|
|
} |
3600
|
|
|
} |
3601
|
11 |
|
unset($test3charEmpty); |
3602
|
|
|
} |
3603
|
|
|
} |
3604
|
|
|
|
3605
|
18 |
|
if ($maybeUTF32BE !== $maybeUTF32LE) { |
3606
|
2 |
|
if ($maybeUTF32LE > $maybeUTF32BE) { |
3607
|
2 |
|
return 1; |
3608
|
|
|
} |
3609
|
|
|
|
3610
|
2 |
|
return 2; |
3611
|
|
|
} |
3612
|
|
|
|
3613
|
18 |
|
return false; |
3614
|
|
|
} |
3615
|
|
|
|
3616
|
|
|
/** |
3617
|
|
|
* Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters. |
3618
|
|
|
* |
3619
|
|
|
* @see http://hsivonen.iki.fi/php-utf8/ |
3620
|
|
|
* |
3621
|
|
|
* @param string|string[] $str <p>The string to be checked.</p> |
3622
|
|
|
* @param bool $strict <p>Check also if the string is not UTF-16 or UTF-32.</p> |
3623
|
|
|
* |
3624
|
|
|
* @return bool |
3625
|
|
|
*/ |
3626
|
106 |
|
public static function is_utf8($str, bool $strict = false): bool |
3627
|
|
|
{ |
3628
|
106 |
|
if (\is_array($str) === true) { |
3629
|
2 |
|
foreach ($str as &$v) { |
3630
|
2 |
|
if (self::is_utf8($v, $strict) === false) { |
3631
|
2 |
|
return false; |
3632
|
|
|
} |
3633
|
|
|
} |
3634
|
|
|
|
3635
|
|
|
return true; |
3636
|
|
|
} |
3637
|
|
|
|
3638
|
106 |
|
if ($str === '') { |
3639
|
12 |
|
return true; |
3640
|
|
|
} |
3641
|
|
|
|
3642
|
102 |
|
if ($strict === true) { |
3643
|
2 |
|
$isBinary = self::is_binary($str, true); |
3644
|
|
|
|
3645
|
2 |
|
if ($isBinary && self::is_utf16($str, false) !== false) { |
3646
|
2 |
|
return false; |
3647
|
|
|
} |
3648
|
|
|
|
3649
|
|
|
if ($isBinary && self::is_utf32($str, false) !== false) { |
3650
|
|
|
return false; |
3651
|
|
|
} |
3652
|
|
|
} |
3653
|
|
|
|
3654
|
102 |
|
if (self::pcre_utf8_support() !== true) { |
3655
|
|
|
|
3656
|
|
|
// If even just the first character can be matched, when the /u |
3657
|
|
|
// modifier is used, then it's valid UTF-8. If the UTF-8 is somehow |
3658
|
|
|
// invalid, nothing at all will match, even if the string contains |
3659
|
|
|
// some valid sequences |
3660
|
|
|
return \preg_match('/^.{1}/us', $str, $ar) === 1; |
3661
|
|
|
} |
3662
|
|
|
|
3663
|
102 |
|
$mState = 0; // cached expected number of octets after the current octet |
3664
|
|
|
// until the beginning of the next UTF8 character sequence |
3665
|
102 |
|
$mUcs4 = 0; // cached Unicode character |
3666
|
102 |
|
$mBytes = 1; // cached expected number of octets in the current sequence |
3667
|
|
|
|
3668
|
102 |
|
if (self::$ORD === null) { |
3669
|
|
|
self::$ORD = self::getData('ord'); |
3670
|
|
|
} |
3671
|
|
|
|
3672
|
102 |
|
$len = \strlen((string) $str); |
3673
|
|
|
/** @noinspection ForeachInvariantsInspection */ |
3674
|
102 |
|
for ($i = 0; $i < $len; ++$i) { |
3675
|
102 |
|
$in = self::$ORD[$str[$i]]; |
3676
|
102 |
|
if ($mState === 0) { |
3677
|
|
|
// When mState is zero we expect either a US-ASCII character or a |
3678
|
|
|
// multi-octet sequence. |
3679
|
102 |
|
if ((0x80 & $in) === 0) { |
3680
|
|
|
// US-ASCII, pass straight through. |
3681
|
97 |
|
$mBytes = 1; |
3682
|
83 |
|
} elseif ((0xE0 & $in) === 0xC0) { |
3683
|
|
|
// First octet of 2 octet sequence. |
3684
|
73 |
|
$mUcs4 = $in; |
3685
|
73 |
|
$mUcs4 = ($mUcs4 & 0x1F) << 6; |
3686
|
73 |
|
$mState = 1; |
3687
|
73 |
|
$mBytes = 2; |
3688
|
58 |
|
} elseif ((0xF0 & $in) === 0xE0) { |
3689
|
|
|
// First octet of 3 octet sequence. |
3690
|
42 |
|
$mUcs4 = $in; |
3691
|
42 |
|
$mUcs4 = ($mUcs4 & 0x0F) << 12; |
3692
|
42 |
|
$mState = 2; |
3693
|
42 |
|
$mBytes = 3; |
3694
|
29 |
|
} elseif ((0xF8 & $in) === 0xF0) { |
3695
|
|
|
// First octet of 4 octet sequence. |
3696
|
18 |
|
$mUcs4 = $in; |
3697
|
18 |
|
$mUcs4 = ($mUcs4 & 0x07) << 18; |
3698
|
18 |
|
$mState = 3; |
3699
|
18 |
|
$mBytes = 4; |
3700
|
13 |
|
} elseif ((0xFC & $in) === 0xF8) { |
3701
|
|
|
/* First octet of 5 octet sequence. |
3702
|
|
|
* |
3703
|
|
|
* This is illegal because the encoded codepoint must be either |
3704
|
|
|
* (a) not the shortest form or |
3705
|
|
|
* (b) outside the Unicode range of 0-0x10FFFF. |
3706
|
|
|
* Rather than trying to resynchronize, we will carry on until the end |
3707
|
|
|
* of the sequence and let the later error handling code catch it. |
3708
|
|
|
*/ |
3709
|
5 |
|
$mUcs4 = $in; |
3710
|
5 |
|
$mUcs4 = ($mUcs4 & 0x03) << 24; |
3711
|
5 |
|
$mState = 4; |
3712
|
5 |
|
$mBytes = 5; |
3713
|
10 |
|
} elseif ((0xFE & $in) === 0xFC) { |
3714
|
|
|
// First octet of 6 octet sequence, see comments for 5 octet sequence. |
3715
|
5 |
|
$mUcs4 = $in; |
3716
|
5 |
|
$mUcs4 = ($mUcs4 & 1) << 30; |
3717
|
5 |
|
$mState = 5; |
3718
|
5 |
|
$mBytes = 6; |
3719
|
|
|
} else { |
3720
|
|
|
// Current octet is neither in the US-ASCII range nor a legal first |
3721
|
|
|
// octet of a multi-octet sequence. |
3722
|
102 |
|
return false; |
3723
|
|
|
} |
3724
|
83 |
|
} elseif ((0xC0 & $in) === 0x80) { |
3725
|
|
|
|
3726
|
|
|
// When mState is non-zero, we expect a continuation of the multi-octet |
3727
|
|
|
// sequence |
3728
|
|
|
|
3729
|
|
|
// Legal continuation. |
3730
|
75 |
|
$shift = ($mState - 1) * 6; |
3731
|
75 |
|
$tmp = $in; |
3732
|
75 |
|
$tmp = ($tmp & 0x0000003F) << $shift; |
3733
|
75 |
|
$mUcs4 |= $tmp; |
3734
|
|
|
// Prefix: End of the multi-octet sequence. mUcs4 now contains the final |
3735
|
|
|
// Unicode code point to be output. |
3736
|
75 |
|
if (--$mState === 0) { |
3737
|
|
|
// Check for illegal sequences and code points. |
3738
|
|
|
// |
3739
|
|
|
// From Unicode 3.1, non-shortest form is illegal |
3740
|
|
|
if ( |
3741
|
75 |
|
($mBytes === 2 && $mUcs4 < 0x0080) |
3742
|
|
|
|| |
3743
|
75 |
|
($mBytes === 3 && $mUcs4 < 0x0800) |
3744
|
|
|
|| |
3745
|
75 |
|
($mBytes === 4 && $mUcs4 < 0x10000) |
3746
|
|
|
|| |
3747
|
75 |
|
($mBytes > 4) |
3748
|
|
|
|| |
3749
|
|
|
// From Unicode 3.2, surrogate characters are illegal. |
3750
|
75 |
|
(($mUcs4 & 0xFFFFF800) === 0xD800) |
3751
|
|
|
|| |
3752
|
|
|
// Code points outside the Unicode range are illegal. |
3753
|
75 |
|
($mUcs4 > 0x10FFFF) |
3754
|
|
|
) { |
3755
|
8 |
|
return false; |
3756
|
|
|
} |
3757
|
|
|
// initialize UTF8 cache |
3758
|
75 |
|
$mState = 0; |
3759
|
75 |
|
$mUcs4 = 0; |
3760
|
75 |
|
$mBytes = 1; |
3761
|
|
|
} |
3762
|
|
|
} else { |
3763
|
|
|
// ((0xC0 & (*in) != 0x80) && (mState != 0)) |
3764
|
|
|
// Incomplete multi-octet sequence. |
3765
|
35 |
|
return false; |
3766
|
|
|
} |
3767
|
|
|
} |
3768
|
|
|
|
3769
|
67 |
|
return true; |
3770
|
|
|
} |
3771
|
|
|
|
3772
|
|
|
/** |
3773
|
|
|
* (PHP 5 >= 5.2.0, PECL json >= 1.2.0)<br/> |
3774
|
|
|
* Decodes a JSON string |
3775
|
|
|
* |
3776
|
|
|
* @see http://php.net/manual/en/function.json-decode.php |
3777
|
|
|
* |
3778
|
|
|
* @param string $json <p> |
3779
|
|
|
* The <i>json</i> string being decoded. |
3780
|
|
|
* </p> |
3781
|
|
|
* <p> |
3782
|
|
|
* This function only works with UTF-8 encoded strings. |
3783
|
|
|
* </p> |
3784
|
|
|
* <p>PHP implements a superset of |
3785
|
|
|
* JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard |
3786
|
|
|
* only supports these values when they are nested inside an array or an object. |
3787
|
|
|
* </p> |
3788
|
|
|
* @param bool $assoc [optional] <p> |
3789
|
|
|
* When <b>TRUE</b>, returned objects will be converted into |
3790
|
|
|
* associative arrays. |
3791
|
|
|
* </p> |
3792
|
|
|
* @param int $depth [optional] <p> |
3793
|
|
|
* User specified recursion depth. |
3794
|
|
|
* </p> |
3795
|
|
|
* @param int $options [optional] <p> |
3796
|
|
|
* Bitmask of JSON decode options. Currently only |
3797
|
|
|
* <b>JSON_BIGINT_AS_STRING</b> |
3798
|
|
|
* is supported (default is to cast large integers as floats) |
3799
|
|
|
* </p> |
3800
|
|
|
* |
3801
|
|
|
* @return mixed |
3802
|
|
|
* The value encoded in <i>json</i> in appropriate PHP type. Values true, false and |
3803
|
|
|
* null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively. |
3804
|
|
|
* <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data |
3805
|
|
|
* is deeper than the recursion limit. |
3806
|
|
|
*/ |
3807
|
43 |
|
public static function json_decode( |
3808
|
|
|
string $json, |
3809
|
|
|
bool $assoc = false, |
3810
|
|
|
int $depth = 512, |
3811
|
|
|
int $options = 0 |
3812
|
|
|
) { |
3813
|
43 |
|
$json = self::filter($json); |
3814
|
|
|
|
3815
|
43 |
|
if (self::$SUPPORT['json'] === false) { |
3816
|
|
|
throw new \RuntimeException('ext-json: is not installed'); |
3817
|
|
|
} |
3818
|
|
|
|
3819
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3820
|
43 |
|
return \json_decode($json, $assoc, $depth, $options); |
3821
|
|
|
} |
3822
|
|
|
|
3823
|
|
|
/** |
3824
|
|
|
* (PHP 5 >= 5.2.0, PECL json >= 1.2.0)<br/> |
3825
|
|
|
* Returns the JSON representation of a value. |
3826
|
|
|
* |
3827
|
|
|
* @see http://php.net/manual/en/function.json-encode.php |
3828
|
|
|
* |
3829
|
|
|
* @param mixed $value <p> |
3830
|
|
|
* The <i>value</i> being encoded. Can be any type except |
3831
|
|
|
* a resource. |
3832
|
|
|
* </p> |
3833
|
|
|
* <p> |
3834
|
|
|
* All string data must be UTF-8 encoded. |
3835
|
|
|
* </p> |
3836
|
|
|
* <p>PHP implements a superset of |
3837
|
|
|
* JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard |
3838
|
|
|
* only supports these values when they are nested inside an array or an object. |
3839
|
|
|
* </p> |
3840
|
|
|
* @param int $options [optional] <p> |
3841
|
|
|
* Bitmask consisting of <b>JSON_HEX_QUOT</b>, |
3842
|
|
|
* <b>JSON_HEX_TAG</b>, |
3843
|
|
|
* <b>JSON_HEX_AMP</b>, |
3844
|
|
|
* <b>JSON_HEX_APOS</b>, |
3845
|
|
|
* <b>JSON_NUMERIC_CHECK</b>, |
3846
|
|
|
* <b>JSON_PRETTY_PRINT</b>, |
3847
|
|
|
* <b>JSON_UNESCAPED_SLASHES</b>, |
3848
|
|
|
* <b>JSON_FORCE_OBJECT</b>, |
3849
|
|
|
* <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these |
3850
|
|
|
* constants is described on |
3851
|
|
|
* the JSON constants page. |
3852
|
|
|
* </p> |
3853
|
|
|
* @param int $depth [optional] <p> |
3854
|
|
|
* Set the maximum depth. Must be greater than zero. |
3855
|
|
|
* </p> |
3856
|
|
|
* |
3857
|
|
|
* @return false|string |
3858
|
|
|
* A JSON encoded <strong>string</strong> on success or<br> |
3859
|
|
|
* <strong>FALSE</strong> on failure |
3860
|
|
|
*/ |
3861
|
5 |
|
public static function json_encode($value, int $options = 0, int $depth = 512) |
3862
|
|
|
{ |
3863
|
5 |
|
$value = self::filter($value); |
3864
|
|
|
|
3865
|
5 |
|
if (self::$SUPPORT['json'] === false) { |
3866
|
|
|
throw new \RuntimeException('ext-json: is not installed'); |
3867
|
|
|
} |
3868
|
|
|
|
3869
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3870
|
5 |
|
return \json_encode($value, $options, $depth); |
3871
|
|
|
} |
3872
|
|
|
|
3873
|
|
|
/** |
3874
|
|
|
* Checks whether JSON is available on the server. |
3875
|
|
|
* |
3876
|
|
|
* @return bool |
3877
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
3878
|
|
|
*/ |
3879
|
|
|
public static function json_loaded(): bool |
3880
|
|
|
{ |
3881
|
|
|
return \function_exists('json_decode'); |
3882
|
|
|
} |
3883
|
|
|
|
3884
|
|
|
/** |
3885
|
|
|
* Makes string's first char lowercase. |
3886
|
|
|
* |
3887
|
|
|
* @param string $str <p>The input string</p> |
3888
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
3889
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
3890
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
3891
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
3892
|
|
|
* |
3893
|
|
|
* @return string the resulting string |
3894
|
|
|
*/ |
3895
|
46 |
|
public static function lcfirst( |
3896
|
|
|
string $str, |
3897
|
|
|
string $encoding = 'UTF-8', |
3898
|
|
|
bool $cleanUtf8 = false, |
3899
|
|
|
string $lang = null, |
3900
|
|
|
bool $tryToKeepStringLength = false |
3901
|
|
|
): string { |
3902
|
46 |
|
if ($cleanUtf8 === true) { |
3903
|
|
|
$str = self::clean($str); |
3904
|
|
|
} |
3905
|
|
|
|
3906
|
46 |
|
$useMbFunction = $lang === null && $tryToKeepStringLength === false; |
3907
|
|
|
|
3908
|
46 |
|
if ($encoding === 'UTF-8') { |
3909
|
43 |
|
$strPartTwo = (string) \mb_substr($str, 1); |
3910
|
|
|
|
3911
|
43 |
|
if ($useMbFunction === true) { |
3912
|
43 |
|
$strPartOne = \mb_strtolower( |
3913
|
43 |
|
(string) \mb_substr($str, 0, 1) |
3914
|
|
|
); |
3915
|
|
|
} else { |
3916
|
|
|
$strPartOne = self::strtolower( |
3917
|
|
|
(string) \mb_substr($str, 0, 1), |
3918
|
|
|
$encoding, |
3919
|
|
|
false, |
3920
|
|
|
$lang, |
3921
|
43 |
|
$tryToKeepStringLength |
3922
|
|
|
); |
3923
|
|
|
} |
3924
|
|
|
} else { |
3925
|
3 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
3926
|
|
|
|
3927
|
3 |
|
$strPartTwo = (string) self::substr($str, 1, null, $encoding); |
3928
|
|
|
|
3929
|
3 |
|
$strPartOne = self::strtolower( |
3930
|
3 |
|
(string) self::substr($str, 0, 1, $encoding), |
3931
|
3 |
|
$encoding, |
3932
|
3 |
|
false, |
3933
|
3 |
|
$lang, |
3934
|
3 |
|
$tryToKeepStringLength |
3935
|
|
|
); |
3936
|
|
|
} |
3937
|
|
|
|
3938
|
46 |
|
return $strPartOne . $strPartTwo; |
3939
|
|
|
} |
3940
|
|
|
|
3941
|
|
|
/** |
3942
|
|
|
* alias for "UTF8::lcfirst()" |
3943
|
|
|
* |
3944
|
|
|
* @param string $str |
3945
|
|
|
* @param string $encoding |
3946
|
|
|
* @param bool $cleanUtf8 |
3947
|
|
|
* @param string|null $lang |
3948
|
|
|
* @param bool $tryToKeepStringLength |
3949
|
|
|
* |
3950
|
|
|
* @return string |
3951
|
|
|
* |
3952
|
|
|
* @see UTF8::lcfirst() |
3953
|
|
|
*/ |
3954
|
2 |
|
public static function lcword( |
3955
|
|
|
string $str, |
3956
|
|
|
string $encoding = 'UTF-8', |
3957
|
|
|
bool $cleanUtf8 = false, |
3958
|
|
|
string $lang = null, |
3959
|
|
|
bool $tryToKeepStringLength = false |
3960
|
|
|
): string { |
3961
|
2 |
|
return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
3962
|
|
|
} |
3963
|
|
|
|
3964
|
|
|
/** |
3965
|
|
|
* Lowercase for all words in the string. |
3966
|
|
|
* |
3967
|
|
|
* @param string $str <p>The input string.</p> |
3968
|
|
|
* @param string[] $exceptions [optional] <p>Exclusion for some words.</p> |
3969
|
|
|
* @param string $charlist [optional] <p>Additional chars that contains to words and do not start |
3970
|
|
|
* a new word.</p> |
3971
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
3972
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
3973
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
3974
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
3975
|
|
|
* |
3976
|
|
|
* @return string |
3977
|
|
|
*/ |
3978
|
2 |
|
public static function lcwords( |
3979
|
|
|
string $str, |
3980
|
|
|
array $exceptions = [], |
3981
|
|
|
string $charlist = '', |
3982
|
|
|
string $encoding = 'UTF-8', |
3983
|
|
|
bool $cleanUtf8 = false, |
3984
|
|
|
string $lang = null, |
3985
|
|
|
bool $tryToKeepStringLength = false |
3986
|
|
|
): string { |
3987
|
2 |
|
if (!$str) { |
3988
|
2 |
|
return ''; |
3989
|
|
|
} |
3990
|
|
|
|
3991
|
2 |
|
$words = self::str_to_words($str, $charlist); |
3992
|
2 |
|
$useExceptions = \count($exceptions) > 0; |
3993
|
|
|
|
3994
|
2 |
|
foreach ($words as &$word) { |
3995
|
2 |
|
if (!$word) { |
3996
|
2 |
|
continue; |
3997
|
|
|
} |
3998
|
|
|
|
3999
|
|
|
if ( |
4000
|
2 |
|
$useExceptions === false |
4001
|
|
|
|| |
4002
|
2 |
|
!\in_array($word, $exceptions, true) |
4003
|
|
|
) { |
4004
|
2 |
|
$word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
4005
|
|
|
} |
4006
|
|
|
} |
4007
|
|
|
|
4008
|
2 |
|
return \implode('', $words); |
4009
|
|
|
} |
4010
|
|
|
|
4011
|
|
|
/** |
4012
|
|
|
* alias for "UTF8::lcfirst()" |
4013
|
|
|
* |
4014
|
|
|
* @param string $str |
4015
|
|
|
* @param string $encoding |
4016
|
|
|
* @param bool $cleanUtf8 |
4017
|
|
|
* @param string|null $lang |
4018
|
|
|
* @param bool $tryToKeepStringLength |
4019
|
|
|
* |
4020
|
|
|
* @return string |
4021
|
|
|
* |
4022
|
|
|
* @see UTF8::lcfirst() |
4023
|
|
|
*/ |
4024
|
5 |
|
public static function lowerCaseFirst( |
4025
|
|
|
string $str, |
4026
|
|
|
string $encoding = 'UTF-8', |
4027
|
|
|
bool $cleanUtf8 = false, |
4028
|
|
|
string $lang = null, |
4029
|
|
|
bool $tryToKeepStringLength = false |
4030
|
|
|
): string { |
4031
|
5 |
|
return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
4032
|
|
|
} |
4033
|
|
|
|
4034
|
|
|
/** |
4035
|
|
|
* Strip whitespace or other characters from beginning of a UTF-8 string. |
4036
|
|
|
* |
4037
|
|
|
* @param string $str <p>The string to be trimmed</p> |
4038
|
|
|
* @param string|null $chars <p>Optional characters to be stripped</p> |
4039
|
|
|
* |
4040
|
|
|
* @return string the string with unwanted characters stripped from the left |
4041
|
|
|
*/ |
4042
|
22 |
|
public static function ltrim(string $str = '', string $chars = null): string |
4043
|
|
|
{ |
4044
|
22 |
|
if ($str === '') { |
4045
|
3 |
|
return ''; |
4046
|
|
|
} |
4047
|
|
|
|
4048
|
21 |
|
if ($chars) { |
4049
|
10 |
|
$chars = \preg_quote($chars, '/'); |
4050
|
10 |
|
$pattern = "^[${chars}]+"; |
4051
|
|
|
} else { |
4052
|
14 |
|
$pattern = '^[\\s]+'; |
4053
|
|
|
} |
4054
|
|
|
|
4055
|
21 |
|
if (self::$SUPPORT['mbstring'] === true) { |
4056
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
4057
|
21 |
|
return (string) \mb_ereg_replace($pattern, '', $str); |
4058
|
|
|
} |
4059
|
|
|
|
4060
|
|
|
return self::regex_replace($str, $pattern, '', '', '/'); |
4061
|
|
|
} |
4062
|
|
|
|
4063
|
|
|
/** |
4064
|
|
|
* Returns the UTF-8 character with the maximum code point in the given data. |
4065
|
|
|
* |
4066
|
|
|
* @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p> |
4067
|
|
|
* |
4068
|
|
|
* @return string|null the character with the highest code point than others, returns null on failure or empty input |
4069
|
|
|
*/ |
4070
|
2 |
|
public static function max($arg) |
4071
|
|
|
{ |
4072
|
2 |
|
if (\is_array($arg) === true) { |
4073
|
2 |
|
$arg = \implode('', $arg); |
4074
|
|
|
} |
4075
|
|
|
|
4076
|
2 |
|
$codepoints = self::codepoints($arg, false); |
4077
|
2 |
|
if (\count($codepoints) === 0) { |
4078
|
2 |
|
return null; |
4079
|
|
|
} |
4080
|
|
|
|
4081
|
2 |
|
$codepoint_max = \max($codepoints); |
4082
|
|
|
|
4083
|
2 |
|
return self::chr($codepoint_max); |
4084
|
|
|
} |
4085
|
|
|
|
4086
|
|
|
/** |
4087
|
|
|
* Calculates and returns the maximum number of bytes taken by any |
4088
|
|
|
* UTF-8 encoded character in the given string. |
4089
|
|
|
* |
4090
|
|
|
* @param string $str <p>The original Unicode string.</p> |
4091
|
|
|
* |
4092
|
|
|
* @return int max byte lengths of the given chars |
4093
|
|
|
*/ |
4094
|
2 |
|
public static function max_chr_width(string $str): int |
4095
|
|
|
{ |
4096
|
2 |
|
$bytes = self::chr_size_list($str); |
4097
|
2 |
|
if (\count($bytes) > 0) { |
4098
|
2 |
|
return (int) \max($bytes); |
4099
|
|
|
} |
4100
|
|
|
|
4101
|
2 |
|
return 0; |
4102
|
|
|
} |
4103
|
|
|
|
4104
|
|
|
/** |
4105
|
|
|
* Checks whether mbstring is available on the server. |
4106
|
|
|
* |
4107
|
|
|
* @return bool |
4108
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
4109
|
|
|
*/ |
4110
|
27 |
|
public static function mbstring_loaded(): bool |
4111
|
|
|
{ |
4112
|
27 |
|
return \extension_loaded('mbstring'); |
4113
|
|
|
} |
4114
|
|
|
|
4115
|
|
|
/** |
4116
|
|
|
* Returns the UTF-8 character with the minimum code point in the given data. |
4117
|
|
|
* |
4118
|
|
|
* @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong> |
4119
|
|
|
* |
4120
|
|
|
* @return string|null the character with the lowest code point than others, returns null on failure or empty input |
4121
|
|
|
*/ |
4122
|
2 |
|
public static function min($arg) |
4123
|
|
|
{ |
4124
|
2 |
|
if (\is_array($arg) === true) { |
4125
|
2 |
|
$arg = \implode('', $arg); |
4126
|
|
|
} |
4127
|
|
|
|
4128
|
2 |
|
$codepoints = self::codepoints($arg, false); |
4129
|
2 |
|
if (\count($codepoints) === 0) { |
4130
|
2 |
|
return null; |
4131
|
|
|
} |
4132
|
|
|
|
4133
|
2 |
|
$codepoint_min = \min($codepoints); |
4134
|
|
|
|
4135
|
2 |
|
return self::chr($codepoint_min); |
4136
|
|
|
} |
4137
|
|
|
|
4138
|
|
|
/** |
4139
|
|
|
* alias for "UTF8::normalize_encoding()" |
4140
|
|
|
* |
4141
|
|
|
* @param mixed $encoding |
4142
|
|
|
* @param mixed $fallback |
4143
|
|
|
* |
4144
|
|
|
* @return mixed |
4145
|
|
|
* |
4146
|
|
|
* @see UTF8::normalize_encoding() |
4147
|
|
|
* @deprecated <p>use "UTF8::normalize_encoding()"</p> |
4148
|
|
|
*/ |
4149
|
2 |
|
public static function normalizeEncoding($encoding, $fallback = '') |
4150
|
|
|
{ |
4151
|
2 |
|
return self::normalize_encoding($encoding, $fallback); |
4152
|
|
|
} |
4153
|
|
|
|
4154
|
|
|
/** |
4155
|
|
|
* Normalize the encoding-"name" input. |
4156
|
|
|
* |
4157
|
|
|
* @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p> |
4158
|
|
|
* @param mixed $fallback <p>e.g.: UTF-8</p> |
4159
|
|
|
* |
4160
|
|
|
* @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default) |
4161
|
|
|
*/ |
4162
|
326 |
|
public static function normalize_encoding($encoding, $fallback = '') |
4163
|
|
|
{ |
4164
|
326 |
|
static $STATIC_NORMALIZE_ENCODING_CACHE = []; |
4165
|
|
|
|
4166
|
|
|
// init |
4167
|
326 |
|
$encoding = (string) $encoding; |
4168
|
|
|
|
4169
|
326 |
|
if (!$encoding) { |
4170
|
281 |
|
return $fallback; |
4171
|
|
|
} |
4172
|
|
|
|
4173
|
|
|
if ( |
4174
|
50 |
|
$encoding === 'UTF-8' |
4175
|
|
|
|| |
4176
|
50 |
|
$encoding === 'UTF8' |
4177
|
|
|
) { |
4178
|
24 |
|
return 'UTF-8'; |
4179
|
|
|
} |
4180
|
|
|
|
4181
|
|
|
if ( |
4182
|
43 |
|
$encoding === '8BIT' |
4183
|
|
|
|| |
4184
|
43 |
|
$encoding === 'BINARY' |
4185
|
|
|
) { |
4186
|
|
|
return 'CP850'; |
4187
|
|
|
} |
4188
|
|
|
|
4189
|
|
|
if ( |
4190
|
43 |
|
$encoding === 'HTML' |
4191
|
|
|
|| |
4192
|
43 |
|
$encoding === 'HTML-ENTITIES' |
4193
|
|
|
) { |
4194
|
2 |
|
return 'HTML-ENTITIES'; |
4195
|
|
|
} |
4196
|
|
|
|
4197
|
|
|
if ( |
4198
|
43 |
|
$encoding === '1' // only a fallback, for non "strict_types" usage ... |
4199
|
|
|
|| |
4200
|
43 |
|
$encoding === '0' // only a fallback, for non "strict_types" usage ... |
4201
|
|
|
) { |
4202
|
1 |
|
return $fallback; |
4203
|
|
|
} |
4204
|
|
|
|
4205
|
42 |
|
if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) { |
4206
|
40 |
|
return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding]; |
4207
|
|
|
} |
4208
|
|
|
|
4209
|
6 |
|
if (self::$ENCODINGS === null) { |
4210
|
1 |
|
self::$ENCODINGS = self::getData('encodings'); |
4211
|
|
|
} |
4212
|
|
|
|
4213
|
6 |
|
if (\in_array($encoding, self::$ENCODINGS, true)) { |
4214
|
4 |
|
$STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding; |
4215
|
|
|
|
4216
|
4 |
|
return $encoding; |
4217
|
|
|
} |
4218
|
|
|
|
4219
|
5 |
|
$encodingOrig = $encoding; |
4220
|
5 |
|
$encoding = \strtoupper($encoding); |
4221
|
5 |
|
$encodingUpperHelper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding); |
4222
|
|
|
|
4223
|
|
|
$equivalences = [ |
4224
|
5 |
|
'ISO8859' => 'ISO-8859-1', |
4225
|
|
|
'ISO88591' => 'ISO-8859-1', |
4226
|
|
|
'ISO' => 'ISO-8859-1', |
4227
|
|
|
'LATIN' => 'ISO-8859-1', |
4228
|
|
|
'LATIN1' => 'ISO-8859-1', // Western European |
4229
|
|
|
'ISO88592' => 'ISO-8859-2', |
4230
|
|
|
'LATIN2' => 'ISO-8859-2', // Central European |
4231
|
|
|
'ISO88593' => 'ISO-8859-3', |
4232
|
|
|
'LATIN3' => 'ISO-8859-3', // Southern European |
4233
|
|
|
'ISO88594' => 'ISO-8859-4', |
4234
|
|
|
'LATIN4' => 'ISO-8859-4', // Northern European |
4235
|
|
|
'ISO88595' => 'ISO-8859-5', |
4236
|
|
|
'ISO88596' => 'ISO-8859-6', // Greek |
4237
|
|
|
'ISO88597' => 'ISO-8859-7', |
4238
|
|
|
'ISO88598' => 'ISO-8859-8', // Hebrew |
4239
|
|
|
'ISO88599' => 'ISO-8859-9', |
4240
|
|
|
'LATIN5' => 'ISO-8859-9', // Turkish |
4241
|
|
|
'ISO885911' => 'ISO-8859-11', |
4242
|
|
|
'TIS620' => 'ISO-8859-11', // Thai |
4243
|
|
|
'ISO885910' => 'ISO-8859-10', |
4244
|
|
|
'LATIN6' => 'ISO-8859-10', // Nordic |
4245
|
|
|
'ISO885913' => 'ISO-8859-13', |
4246
|
|
|
'LATIN7' => 'ISO-8859-13', // Baltic |
4247
|
|
|
'ISO885914' => 'ISO-8859-14', |
4248
|
|
|
'LATIN8' => 'ISO-8859-14', // Celtic |
4249
|
|
|
'ISO885915' => 'ISO-8859-15', |
4250
|
|
|
'LATIN9' => 'ISO-8859-15', // Western European (with some extra chars e.g. €) |
4251
|
|
|
'ISO885916' => 'ISO-8859-16', |
4252
|
|
|
'LATIN10' => 'ISO-8859-16', // Southeast European |
4253
|
|
|
'CP1250' => 'WINDOWS-1250', |
4254
|
|
|
'WIN1250' => 'WINDOWS-1250', |
4255
|
|
|
'WINDOWS1250' => 'WINDOWS-1250', |
4256
|
|
|
'CP1251' => 'WINDOWS-1251', |
4257
|
|
|
'WIN1251' => 'WINDOWS-1251', |
4258
|
|
|
'WINDOWS1251' => 'WINDOWS-1251', |
4259
|
|
|
'CP1252' => 'WINDOWS-1252', |
4260
|
|
|
'WIN1252' => 'WINDOWS-1252', |
4261
|
|
|
'WINDOWS1252' => 'WINDOWS-1252', |
4262
|
|
|
'CP1253' => 'WINDOWS-1253', |
4263
|
|
|
'WIN1253' => 'WINDOWS-1253', |
4264
|
|
|
'WINDOWS1253' => 'WINDOWS-1253', |
4265
|
|
|
'CP1254' => 'WINDOWS-1254', |
4266
|
|
|
'WIN1254' => 'WINDOWS-1254', |
4267
|
|
|
'WINDOWS1254' => 'WINDOWS-1254', |
4268
|
|
|
'CP1255' => 'WINDOWS-1255', |
4269
|
|
|
'WIN1255' => 'WINDOWS-1255', |
4270
|
|
|
'WINDOWS1255' => 'WINDOWS-1255', |
4271
|
|
|
'CP1256' => 'WINDOWS-1256', |
4272
|
|
|
'WIN1256' => 'WINDOWS-1256', |
4273
|
|
|
'WINDOWS1256' => 'WINDOWS-1256', |
4274
|
|
|
'CP1257' => 'WINDOWS-1257', |
4275
|
|
|
'WIN1257' => 'WINDOWS-1257', |
4276
|
|
|
'WINDOWS1257' => 'WINDOWS-1257', |
4277
|
|
|
'CP1258' => 'WINDOWS-1258', |
4278
|
|
|
'WIN1258' => 'WINDOWS-1258', |
4279
|
|
|
'WINDOWS1258' => 'WINDOWS-1258', |
4280
|
|
|
'UTF16' => 'UTF-16', |
4281
|
|
|
'UTF32' => 'UTF-32', |
4282
|
|
|
'UTF8' => 'UTF-8', |
4283
|
|
|
'UTF' => 'UTF-8', |
4284
|
|
|
'UTF7' => 'UTF-7', |
4285
|
|
|
'8BIT' => 'CP850', |
4286
|
|
|
'BINARY' => 'CP850', |
4287
|
|
|
]; |
4288
|
|
|
|
4289
|
5 |
|
if (!empty($equivalences[$encodingUpperHelper])) { |
4290
|
4 |
|
$encoding = $equivalences[$encodingUpperHelper]; |
4291
|
|
|
} |
4292
|
|
|
|
4293
|
5 |
|
$STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding; |
4294
|
|
|
|
4295
|
5 |
|
return $encoding; |
4296
|
|
|
} |
4297
|
|
|
|
4298
|
|
|
/** |
4299
|
|
|
* Standardize line ending to unix-like. |
4300
|
|
|
* |
4301
|
|
|
* @param string $str |
4302
|
|
|
* |
4303
|
|
|
* @return string |
4304
|
|
|
*/ |
4305
|
5 |
|
public static function normalize_line_ending(string $str): string |
4306
|
|
|
{ |
4307
|
5 |
|
return \str_replace(["\r\n", "\r"], "\n", $str); |
4308
|
|
|
} |
4309
|
|
|
|
4310
|
|
|
/** |
4311
|
|
|
* Normalize some MS Word special characters. |
4312
|
|
|
* |
4313
|
|
|
* @param string $str <p>The string to be normalized.</p> |
4314
|
|
|
* |
4315
|
|
|
* @return string |
4316
|
|
|
*/ |
4317
|
38 |
|
public static function normalize_msword(string $str): string |
4318
|
|
|
{ |
4319
|
38 |
|
if ($str === '') { |
4320
|
2 |
|
return ''; |
4321
|
|
|
} |
4322
|
|
|
|
4323
|
|
|
$keys = [ |
4324
|
38 |
|
"\xc2\xab", // « (U+00AB) in UTF-8 |
4325
|
|
|
"\xc2\xbb", // » (U+00BB) in UTF-8 |
4326
|
|
|
"\xe2\x80\x98", // ‘ (U+2018) in UTF-8 |
4327
|
|
|
"\xe2\x80\x99", // ’ (U+2019) in UTF-8 |
4328
|
|
|
"\xe2\x80\x9a", // ‚ (U+201A) in UTF-8 |
4329
|
|
|
"\xe2\x80\x9b", // ‛ (U+201B) in UTF-8 |
4330
|
|
|
"\xe2\x80\x9c", // “ (U+201C) in UTF-8 |
4331
|
|
|
"\xe2\x80\x9d", // ” (U+201D) in UTF-8 |
4332
|
|
|
"\xe2\x80\x9e", // „ (U+201E) in UTF-8 |
4333
|
|
|
"\xe2\x80\x9f", // ‟ (U+201F) in UTF-8 |
4334
|
|
|
"\xe2\x80\xb9", // ‹ (U+2039) in UTF-8 |
4335
|
|
|
"\xe2\x80\xba", // › (U+203A) in UTF-8 |
4336
|
|
|
"\xe2\x80\x93", // – (U+2013) in UTF-8 |
4337
|
|
|
"\xe2\x80\x94", // — (U+2014) in UTF-8 |
4338
|
|
|
"\xe2\x80\xa6", // … (U+2026) in UTF-8 |
4339
|
|
|
]; |
4340
|
|
|
|
4341
|
|
|
$values = [ |
4342
|
38 |
|
'"', // « (U+00AB) in UTF-8 |
4343
|
|
|
'"', // » (U+00BB) in UTF-8 |
4344
|
|
|
"'", // ‘ (U+2018) in UTF-8 |
4345
|
|
|
"'", // ’ (U+2019) in UTF-8 |
4346
|
|
|
"'", // ‚ (U+201A) in UTF-8 |
4347
|
|
|
"'", // ‛ (U+201B) in UTF-8 |
4348
|
|
|
'"', // “ (U+201C) in UTF-8 |
4349
|
|
|
'"', // ” (U+201D) in UTF-8 |
4350
|
|
|
'"', // „ (U+201E) in UTF-8 |
4351
|
|
|
'"', // ‟ (U+201F) in UTF-8 |
4352
|
|
|
"'", // ‹ (U+2039) in UTF-8 |
4353
|
|
|
"'", // › (U+203A) in UTF-8 |
4354
|
|
|
'-', // – (U+2013) in UTF-8 |
4355
|
|
|
'-', // — (U+2014) in UTF-8 |
4356
|
|
|
'...', // … (U+2026) in UTF-8 |
4357
|
|
|
]; |
4358
|
|
|
|
4359
|
38 |
|
return \str_replace($keys, $values, $str); |
4360
|
|
|
} |
4361
|
|
|
|
4362
|
|
|
/** |
4363
|
|
|
* Normalize the whitespace. |
4364
|
|
|
* |
4365
|
|
|
* @param string $str <p>The string to be normalized.</p> |
4366
|
|
|
* @param bool $keepNonBreakingSpace [optional] <p>Set to true, to keep non-breaking-spaces.</p> |
4367
|
|
|
* @param bool $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web) |
4368
|
|
|
* bidirectional text chars.</p> |
4369
|
|
|
* |
4370
|
|
|
* @return string |
4371
|
|
|
*/ |
4372
|
88 |
|
public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string |
4373
|
|
|
{ |
4374
|
88 |
|
if ($str === '') { |
4375
|
9 |
|
return ''; |
4376
|
|
|
} |
4377
|
|
|
|
4378
|
88 |
|
static $WHITESPACE_CACHE = []; |
4379
|
88 |
|
$cacheKey = (int) $keepNonBreakingSpace; |
4380
|
|
|
|
4381
|
88 |
|
if (!isset($WHITESPACE_CACHE[$cacheKey])) { |
4382
|
2 |
|
$WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE; |
4383
|
|
|
|
4384
|
2 |
|
if ($keepNonBreakingSpace === true) { |
4385
|
1 |
|
unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']); |
4386
|
|
|
} |
4387
|
|
|
|
4388
|
2 |
|
$WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]); |
4389
|
|
|
} |
4390
|
|
|
|
4391
|
88 |
|
if ($keepBidiUnicodeControls === false) { |
4392
|
88 |
|
static $BIDI_UNICODE_CONTROLS_CACHE = null; |
4393
|
|
|
|
4394
|
88 |
|
if ($BIDI_UNICODE_CONTROLS_CACHE === null) { |
4395
|
1 |
|
$BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE); |
4396
|
|
|
} |
4397
|
|
|
|
4398
|
88 |
|
$str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str); |
4399
|
|
|
} |
4400
|
|
|
|
4401
|
88 |
|
return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str); |
4402
|
|
|
} |
4403
|
|
|
|
4404
|
|
|
/** |
4405
|
|
|
* Calculates Unicode code point of the given UTF-8 encoded character. |
4406
|
|
|
* |
4407
|
|
|
* INFO: opposite to UTF8::chr() |
4408
|
|
|
* |
4409
|
|
|
* @param string $chr <p>The character of which to calculate code point.<p/> |
4410
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
4411
|
|
|
* |
4412
|
|
|
* @return int |
4413
|
|
|
* Unicode code point of the given character,<br> |
4414
|
|
|
* 0 on invalid UTF-8 byte sequence |
4415
|
|
|
*/ |
4416
|
30 |
|
public static function ord($chr, string $encoding = 'UTF-8'): int |
4417
|
|
|
{ |
4418
|
30 |
|
static $CHAR_CACHE = []; |
4419
|
|
|
|
4420
|
|
|
// init |
4421
|
30 |
|
$chr = (string) $chr; |
4422
|
|
|
|
4423
|
30 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
4424
|
5 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
4425
|
|
|
} |
4426
|
|
|
|
4427
|
30 |
|
$cacheKey = $chr . $encoding; |
4428
|
30 |
|
if (isset($CHAR_CACHE[$cacheKey]) === true) { |
4429
|
30 |
|
return $CHAR_CACHE[$cacheKey]; |
4430
|
|
|
} |
4431
|
|
|
|
4432
|
|
|
// check again, if it's still not UTF-8 |
4433
|
12 |
|
if ($encoding !== 'UTF-8') { |
4434
|
3 |
|
$chr = self::encode($encoding, $chr); |
4435
|
|
|
} |
4436
|
|
|
|
4437
|
12 |
|
if (self::$ORD === null) { |
4438
|
|
|
self::$ORD = self::getData('ord'); |
4439
|
|
|
} |
4440
|
|
|
|
4441
|
12 |
|
if (isset(self::$ORD[$chr])) { |
4442
|
12 |
|
return $CHAR_CACHE[$cacheKey] = self::$ORD[$chr]; |
4443
|
|
|
} |
4444
|
|
|
|
4445
|
|
|
// |
4446
|
|
|
// fallback via "IntlChar" |
4447
|
|
|
// |
4448
|
|
|
|
4449
|
6 |
|
if (self::$SUPPORT['intlChar'] === true) { |
4450
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
4451
|
5 |
|
$code = \IntlChar::ord($chr); |
4452
|
5 |
|
if ($code) { |
4453
|
5 |
|
return $CHAR_CACHE[$cacheKey] = $code; |
4454
|
|
|
} |
4455
|
|
|
} |
4456
|
|
|
|
4457
|
|
|
// |
4458
|
|
|
// fallback via vanilla php |
4459
|
|
|
// |
4460
|
|
|
|
4461
|
|
|
/** @noinspection CallableParameterUseCaseInTypeContextInspection */ |
4462
|
1 |
|
$chr = \unpack('C*', (string) \substr($chr, 0, 4)); |
4463
|
|
|
/** @noinspection OffsetOperationsInspection */ |
4464
|
1 |
|
$code = $chr ? $chr[1] : 0; |
4465
|
|
|
|
4466
|
|
|
/** @noinspection OffsetOperationsInspection */ |
4467
|
1 |
|
if ($code >= 0xF0 && isset($chr[4])) { |
4468
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
4469
|
|
|
/** @noinspection OffsetOperationsInspection */ |
4470
|
|
|
return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80); |
4471
|
|
|
} |
4472
|
|
|
|
4473
|
|
|
/** @noinspection OffsetOperationsInspection */ |
4474
|
1 |
|
if ($code >= 0xE0 && isset($chr[3])) { |
4475
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
4476
|
|
|
/** @noinspection OffsetOperationsInspection */ |
4477
|
1 |
|
return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80); |
4478
|
|
|
} |
4479
|
|
|
|
4480
|
|
|
/** @noinspection OffsetOperationsInspection */ |
4481
|
1 |
|
if ($code >= 0xC0 && isset($chr[2])) { |
4482
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
4483
|
|
|
/** @noinspection OffsetOperationsInspection */ |
4484
|
1 |
|
return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80); |
4485
|
|
|
} |
4486
|
|
|
|
4487
|
|
|
return $CHAR_CACHE[$cacheKey] = $code; |
4488
|
|
|
} |
4489
|
|
|
|
4490
|
|
|
/** |
4491
|
|
|
* Parses the string into an array (into the the second parameter). |
4492
|
|
|
* |
4493
|
|
|
* WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope, |
4494
|
|
|
* if the second parameter is not set! |
4495
|
|
|
* |
4496
|
|
|
* @see http://php.net/manual/en/function.parse-str.php |
4497
|
|
|
* |
4498
|
|
|
* @param string $str <p>The input string.</p> |
4499
|
|
|
* @param array $result <p>The result will be returned into this reference parameter.</p> |
4500
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
4501
|
|
|
* |
4502
|
|
|
* @return bool |
4503
|
|
|
* Will return <strong>false</strong> if php can't parse the string and we haven't any $result |
4504
|
|
|
*/ |
4505
|
2 |
|
public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool |
4506
|
|
|
{ |
4507
|
2 |
|
if ($cleanUtf8 === true) { |
4508
|
2 |
|
$str = self::clean($str); |
4509
|
|
|
} |
4510
|
|
|
|
4511
|
2 |
|
if (self::$SUPPORT['mbstring'] === true) { |
4512
|
2 |
|
$return = \mb_parse_str($str, $result); |
4513
|
|
|
|
4514
|
2 |
|
return $return !== false && $result !== []; |
4515
|
|
|
} |
4516
|
|
|
|
4517
|
|
|
/** @noinspection PhpVoidFunctionResultUsedInspection */ |
4518
|
|
|
\parse_str($str, $result); |
4519
|
|
|
|
4520
|
|
|
return $result !== []; |
4521
|
|
|
} |
4522
|
|
|
|
4523
|
|
|
/** |
4524
|
|
|
* Checks if \u modifier is available that enables Unicode support in PCRE. |
4525
|
|
|
* |
4526
|
|
|
* @return bool |
4527
|
|
|
* <strong>true</strong> if support is available,<br> |
4528
|
|
|
* <strong>false</strong> otherwise |
4529
|
|
|
*/ |
4530
|
102 |
|
public static function pcre_utf8_support(): bool |
4531
|
|
|
{ |
4532
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
4533
|
102 |
|
return (bool) @\preg_match('//u', ''); |
4534
|
|
|
} |
4535
|
|
|
|
4536
|
|
|
/** |
4537
|
|
|
* Create an array containing a range of UTF-8 characters. |
4538
|
|
|
* |
4539
|
|
|
* @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p> |
4540
|
|
|
* @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p> |
4541
|
|
|
* |
4542
|
|
|
* @return string[] |
4543
|
|
|
*/ |
4544
|
2 |
|
public static function range($var1, $var2): array |
4545
|
|
|
{ |
4546
|
2 |
|
if (!$var1 || !$var2) { |
4547
|
2 |
|
return []; |
4548
|
|
|
} |
4549
|
|
|
|
4550
|
2 |
|
if (self::$SUPPORT['ctype'] === false) { |
4551
|
|
|
throw new \RuntimeException('ext-ctype: is not installed'); |
4552
|
|
|
} |
4553
|
|
|
|
4554
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
4555
|
2 |
|
if (\ctype_digit((string) $var1)) { |
4556
|
2 |
|
$start = (int) $var1; |
4557
|
2 |
|
} /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) { |
4558
|
|
|
$start = (int) self::hex_to_int($var1); |
4559
|
|
|
} else { |
4560
|
2 |
|
$start = self::ord($var1); |
4561
|
|
|
} |
4562
|
|
|
|
4563
|
2 |
|
if (!$start) { |
4564
|
|
|
return []; |
4565
|
|
|
} |
4566
|
|
|
|
4567
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
4568
|
2 |
|
if (\ctype_digit((string) $var2)) { |
4569
|
2 |
|
$end = (int) $var2; |
4570
|
2 |
|
} /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) { |
4571
|
|
|
$end = (int) self::hex_to_int($var2); |
4572
|
|
|
} else { |
4573
|
2 |
|
$end = self::ord($var2); |
4574
|
|
|
} |
4575
|
|
|
|
4576
|
2 |
|
if (!$end) { |
4577
|
|
|
return []; |
4578
|
|
|
} |
4579
|
|
|
|
4580
|
2 |
|
return \array_map( |
4581
|
|
|
static function (int $i): string { |
4582
|
2 |
|
return (string) self::chr($i); |
4583
|
2 |
|
}, |
4584
|
2 |
|
\range($start, $end) |
4585
|
|
|
); |
4586
|
|
|
} |
4587
|
|
|
|
4588
|
|
|
/** |
4589
|
|
|
* Multi decode html entity & fix urlencoded-win1252-chars. |
4590
|
|
|
* |
4591
|
|
|
* e.g: |
4592
|
|
|
* 'test+test' => 'test+test' |
4593
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
4594
|
|
|
* 'D%FCsseldorf' => 'Düsseldorf' |
4595
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
4596
|
|
|
* 'D%26%23xFC%3Bsseldorf' => 'Düsseldorf' |
4597
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
4598
|
|
|
* 'D%C3%BCsseldorf' => 'Düsseldorf' |
4599
|
|
|
* 'D%C3%83%C2%BCsseldorf' => 'Düsseldorf' |
4600
|
|
|
* 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf' |
4601
|
|
|
* |
4602
|
|
|
* @param string $str <p>The input string.</p> |
4603
|
|
|
* @param bool $multi_decode <p>Decode as often as possible.</p> |
4604
|
|
|
* |
4605
|
|
|
* @return string |
4606
|
|
|
*/ |
4607
|
6 |
|
public static function rawurldecode(string $str, bool $multi_decode = true): string |
4608
|
|
|
{ |
4609
|
6 |
|
if ($str === '') { |
4610
|
4 |
|
return ''; |
4611
|
|
|
} |
4612
|
|
|
|
4613
|
|
|
if ( |
4614
|
6 |
|
\strpos($str, '&') === false |
4615
|
|
|
&& |
4616
|
6 |
|
\strpos($str, '%') === false |
4617
|
|
|
&& |
4618
|
6 |
|
\strpos($str, '+') === false |
4619
|
|
|
&& |
4620
|
6 |
|
\strpos($str, '\u') === false |
4621
|
|
|
) { |
4622
|
4 |
|
return self::fix_simple_utf8($str); |
4623
|
|
|
} |
4624
|
|
|
|
4625
|
6 |
|
$str = self::urldecode_unicode_helper($str); |
4626
|
|
|
|
4627
|
|
|
do { |
4628
|
6 |
|
$str_compare = $str; |
4629
|
|
|
|
4630
|
|
|
/** |
4631
|
|
|
* @psalm-suppress PossiblyInvalidArgument |
4632
|
|
|
*/ |
4633
|
6 |
|
$str = self::fix_simple_utf8( |
4634
|
6 |
|
\rawurldecode( |
4635
|
6 |
|
self::html_entity_decode( |
4636
|
6 |
|
self::to_utf8($str), |
4637
|
6 |
|
\ENT_QUOTES | \ENT_HTML5 |
4638
|
|
|
) |
4639
|
|
|
) |
4640
|
|
|
); |
4641
|
6 |
|
} while ($multi_decode === true && $str_compare !== $str); |
4642
|
|
|
|
4643
|
6 |
|
return $str; |
4644
|
|
|
} |
4645
|
|
|
|
4646
|
|
|
/** |
4647
|
|
|
* Replaces all occurrences of $pattern in $str by $replacement. |
4648
|
|
|
* |
4649
|
|
|
* @param string $str <p>The input string.</p> |
4650
|
|
|
* @param string $pattern <p>The regular expression pattern.</p> |
4651
|
|
|
* @param string $replacement <p>The string to replace with.</p> |
4652
|
|
|
* @param string $options [optional] <p>Matching conditions to be used.</p> |
4653
|
|
|
* @param string $delimiter [optional] <p>Delimiter the the regex. Default: '/'</p> |
4654
|
|
|
* |
4655
|
|
|
* @return string |
4656
|
|
|
*/ |
4657
|
18 |
|
public static function regex_replace( |
4658
|
|
|
string $str, |
4659
|
|
|
string $pattern, |
4660
|
|
|
string $replacement, |
4661
|
|
|
string $options = '', |
4662
|
|
|
string $delimiter = '/' |
4663
|
|
|
): string { |
4664
|
18 |
|
if ($options === 'msr') { |
4665
|
9 |
|
$options = 'ms'; |
4666
|
|
|
} |
4667
|
|
|
|
4668
|
|
|
// fallback |
4669
|
18 |
|
if (!$delimiter) { |
4670
|
|
|
$delimiter = '/'; |
4671
|
|
|
} |
4672
|
|
|
|
4673
|
18 |
|
return (string) \preg_replace( |
4674
|
18 |
|
$delimiter . $pattern . $delimiter . 'u' . $options, |
4675
|
18 |
|
$replacement, |
4676
|
18 |
|
$str |
4677
|
|
|
); |
4678
|
|
|
} |
4679
|
|
|
|
4680
|
|
|
/** |
4681
|
|
|
* alias for "UTF8::remove_bom()" |
4682
|
|
|
* |
4683
|
|
|
* @param string $str |
4684
|
|
|
* |
4685
|
|
|
* @return string |
4686
|
|
|
* |
4687
|
|
|
* @see UTF8::remove_bom() |
4688
|
|
|
* @deprecated <p>use "UTF8::remove_bom()"</p> |
4689
|
|
|
*/ |
4690
|
|
|
public static function removeBOM(string $str): string |
4691
|
|
|
{ |
4692
|
|
|
return self::remove_bom($str); |
4693
|
|
|
} |
4694
|
|
|
|
4695
|
|
|
/** |
4696
|
|
|
* Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings. |
4697
|
|
|
* |
4698
|
|
|
* @param string $str <p>The input string.</p> |
4699
|
|
|
* |
4700
|
|
|
* @return string string without UTF-BOM |
4701
|
|
|
*/ |
4702
|
82 |
|
public static function remove_bom(string $str): string |
4703
|
|
|
{ |
4704
|
82 |
|
if ($str === '') { |
4705
|
9 |
|
return ''; |
4706
|
|
|
} |
4707
|
|
|
|
4708
|
82 |
|
$strLength = \strlen($str); |
4709
|
82 |
|
foreach (self::$BOM as $bomString => $bomByteLength) { |
4710
|
82 |
|
if (\strpos($str, $bomString, 0) === 0) { |
4711
|
10 |
|
$strTmp = \substr($str, $bomByteLength, $strLength); |
4712
|
10 |
|
if ($strTmp === false) { |
4713
|
|
|
return ''; |
4714
|
|
|
} |
4715
|
|
|
|
4716
|
10 |
|
$strLength -= (int) $bomByteLength; |
4717
|
|
|
|
4718
|
82 |
|
$str = (string) $strTmp; |
4719
|
|
|
} |
4720
|
|
|
} |
4721
|
|
|
|
4722
|
82 |
|
return $str; |
4723
|
|
|
} |
4724
|
|
|
|
4725
|
|
|
/** |
4726
|
|
|
* Removes duplicate occurrences of a string in another string. |
4727
|
|
|
* |
4728
|
|
|
* @param string $str <p>The base string.</p> |
4729
|
|
|
* @param string|string[] $what <p>String to search for in the base string.</p> |
4730
|
|
|
* |
4731
|
|
|
* @return string the result string with removed duplicates |
4732
|
|
|
*/ |
4733
|
2 |
|
public static function remove_duplicates(string $str, $what = ' '): string |
4734
|
|
|
{ |
4735
|
2 |
|
if (\is_string($what) === true) { |
4736
|
2 |
|
$what = [$what]; |
4737
|
|
|
} |
4738
|
|
|
|
4739
|
2 |
|
if (\is_array($what) === true) { |
|
|
|
|
4740
|
|
|
/** @noinspection ForeachSourceInspection */ |
4741
|
2 |
|
foreach ($what as $item) { |
4742
|
2 |
|
$str = (string) \preg_replace('/(' . \preg_quote($item, '/u') . ')+/u', $item, $str); |
4743
|
|
|
} |
4744
|
|
|
} |
4745
|
|
|
|
4746
|
2 |
|
return $str; |
4747
|
|
|
} |
4748
|
|
|
|
4749
|
|
|
/** |
4750
|
|
|
* Remove html via "strip_tags()" from the string. |
4751
|
|
|
* |
4752
|
|
|
* @param string $str |
4753
|
|
|
* @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should |
4754
|
|
|
* not be stripped. Default: null |
4755
|
|
|
* </p> |
4756
|
|
|
* |
4757
|
|
|
* @return string |
4758
|
|
|
*/ |
4759
|
6 |
|
public static function remove_html(string $str, string $allowableTags = ''): string |
4760
|
|
|
{ |
4761
|
6 |
|
return \strip_tags($str, $allowableTags); |
4762
|
|
|
} |
4763
|
|
|
|
4764
|
|
|
/** |
4765
|
|
|
* Remove all breaks [<br> | \r\n | \r | \n | ...] from the string. |
4766
|
|
|
* |
4767
|
|
|
* @param string $str |
4768
|
|
|
* @param string $replacement [optional] <p>Default is a empty string.</p> |
4769
|
|
|
* |
4770
|
|
|
* @return string |
4771
|
|
|
*/ |
4772
|
6 |
|
public static function remove_html_breaks(string $str, string $replacement = ''): string |
4773
|
|
|
{ |
4774
|
6 |
|
return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str); |
4775
|
|
|
} |
4776
|
|
|
|
4777
|
|
|
/** |
4778
|
|
|
* Remove invisible characters from a string. |
4779
|
|
|
* |
4780
|
|
|
* e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script. |
4781
|
|
|
* |
4782
|
|
|
* copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php |
4783
|
|
|
* |
4784
|
|
|
* @param string $str |
4785
|
|
|
* @param bool $url_encoded |
4786
|
|
|
* @param string $replacement |
4787
|
|
|
* |
4788
|
|
|
* @return string |
4789
|
|
|
*/ |
4790
|
116 |
|
public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string |
4791
|
|
|
{ |
4792
|
|
|
// init |
4793
|
116 |
|
$non_displayables = []; |
4794
|
|
|
|
4795
|
|
|
// every control character except newline (dec 10), |
4796
|
|
|
// carriage return (dec 13) and horizontal tab (dec 09) |
4797
|
116 |
|
if ($url_encoded) { |
4798
|
116 |
|
$non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15 |
4799
|
116 |
|
$non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31 |
4800
|
|
|
} |
4801
|
|
|
|
4802
|
116 |
|
$non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127 |
4803
|
|
|
|
4804
|
|
|
do { |
4805
|
116 |
|
$str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count); |
4806
|
116 |
|
} while ($count !== 0); |
4807
|
|
|
|
4808
|
116 |
|
return $str; |
4809
|
|
|
} |
4810
|
|
|
|
4811
|
|
|
/** |
4812
|
|
|
* Returns a new string with the prefix $substring removed, if present. |
4813
|
|
|
* |
4814
|
|
|
* @param string $str |
4815
|
|
|
* @param string $substring <p>The prefix to remove.</p> |
4816
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
4817
|
|
|
* |
4818
|
|
|
* @return string string without the prefix $substring |
4819
|
|
|
*/ |
4820
|
12 |
|
public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string |
4821
|
|
|
{ |
4822
|
12 |
|
if ($substring && \strpos($str, $substring) === 0) { |
4823
|
6 |
|
if ($encoding === 'UTF-8') { |
4824
|
4 |
|
return (string) \mb_substr( |
4825
|
4 |
|
$str, |
4826
|
4 |
|
(int) \mb_strlen($substring) |
4827
|
|
|
); |
4828
|
|
|
} |
4829
|
|
|
|
4830
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
4831
|
|
|
|
4832
|
2 |
|
return (string) self::substr( |
4833
|
2 |
|
$str, |
4834
|
2 |
|
(int) self::strlen($substring, $encoding), |
4835
|
2 |
|
null, |
4836
|
2 |
|
$encoding |
4837
|
|
|
); |
4838
|
|
|
} |
4839
|
|
|
|
4840
|
6 |
|
return $str; |
4841
|
|
|
} |
4842
|
|
|
|
4843
|
|
|
/** |
4844
|
|
|
* Returns a new string with the suffix $substring removed, if present. |
4845
|
|
|
* |
4846
|
|
|
* @param string $str |
4847
|
|
|
* @param string $substring <p>The suffix to remove.</p> |
4848
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
4849
|
|
|
* |
4850
|
|
|
* @return string string having a $str without the suffix $substring |
4851
|
|
|
*/ |
4852
|
12 |
|
public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string |
4853
|
|
|
{ |
4854
|
12 |
|
if ($substring && \substr($str, -\strlen($substring)) === $substring) { |
4855
|
6 |
|
if ($encoding === 'UTF-8') { |
4856
|
4 |
|
return (string) \mb_substr( |
4857
|
4 |
|
$str, |
4858
|
4 |
|
0, |
4859
|
4 |
|
(int) \mb_strlen($str) - (int) \mb_strlen($substring) |
4860
|
|
|
); |
4861
|
|
|
} |
4862
|
|
|
|
4863
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
4864
|
|
|
|
4865
|
2 |
|
return (string) self::substr( |
4866
|
2 |
|
$str, |
4867
|
2 |
|
0, |
4868
|
2 |
|
(int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding), |
4869
|
2 |
|
$encoding |
4870
|
|
|
); |
4871
|
|
|
} |
4872
|
|
|
|
4873
|
6 |
|
return $str; |
4874
|
|
|
} |
4875
|
|
|
|
4876
|
|
|
/** |
4877
|
|
|
* Replaces all occurrences of $search in $str by $replacement. |
4878
|
|
|
* |
4879
|
|
|
* @param string $str <p>The input string.</p> |
4880
|
|
|
* @param string $search <p>The needle to search for.</p> |
4881
|
|
|
* @param string $replacement <p>The string to replace with.</p> |
4882
|
|
|
* @param bool $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
4883
|
|
|
* |
4884
|
|
|
* @return string string after the replacements |
4885
|
|
|
*/ |
4886
|
29 |
|
public static function replace( |
4887
|
|
|
string $str, |
4888
|
|
|
string $search, |
4889
|
|
|
string $replacement, |
4890
|
|
|
bool $caseSensitive = true |
4891
|
|
|
): string { |
4892
|
29 |
|
if ($caseSensitive) { |
4893
|
22 |
|
return \str_replace($search, $replacement, $str); |
4894
|
|
|
} |
4895
|
|
|
|
4896
|
7 |
|
return self::str_ireplace($search, $replacement, $str); |
4897
|
|
|
} |
4898
|
|
|
|
4899
|
|
|
/** |
4900
|
|
|
* Replaces all occurrences of $search in $str by $replacement. |
4901
|
|
|
* |
4902
|
|
|
* @param string $str <p>The input string.</p> |
4903
|
|
|
* @param array $search <p>The elements to search for.</p> |
4904
|
|
|
* @param array|string $replacement <p>The string to replace with.</p> |
4905
|
|
|
* @param bool $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
4906
|
|
|
* |
4907
|
|
|
* @return string string after the replacements |
4908
|
|
|
*/ |
4909
|
30 |
|
public static function replace_all( |
4910
|
|
|
string $str, |
4911
|
|
|
array $search, |
4912
|
|
|
$replacement, |
4913
|
|
|
bool $caseSensitive = true |
4914
|
|
|
): string { |
4915
|
30 |
|
if ($caseSensitive) { |
4916
|
23 |
|
return \str_replace($search, $replacement, $str); |
4917
|
|
|
} |
4918
|
|
|
|
4919
|
7 |
|
return self::str_ireplace($search, $replacement, $str); |
4920
|
|
|
} |
4921
|
|
|
|
4922
|
|
|
/** |
4923
|
|
|
* Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement. |
4924
|
|
|
* |
4925
|
|
|
* @param string $str <p>The input string</p> |
4926
|
|
|
* @param string $replacementChar <p>The replacement character.</p> |
4927
|
|
|
* @param bool $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p> |
4928
|
|
|
* |
4929
|
|
|
* @return string |
4930
|
|
|
*/ |
4931
|
62 |
|
public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string |
4932
|
|
|
{ |
4933
|
62 |
|
if ($str === '') { |
4934
|
9 |
|
return ''; |
4935
|
|
|
} |
4936
|
|
|
|
4937
|
62 |
|
if ($processInvalidUtf8 === true) { |
4938
|
62 |
|
$replacementCharHelper = $replacementChar; |
4939
|
62 |
|
if ($replacementChar === '') { |
4940
|
62 |
|
$replacementCharHelper = 'none'; |
4941
|
|
|
} |
4942
|
|
|
|
4943
|
62 |
|
if (self::$SUPPORT['mbstring'] === false) { |
4944
|
|
|
// if there is no native support for "mbstring", |
4945
|
|
|
// then we need to clean the string before ... |
4946
|
|
|
$str = self::clean($str); |
4947
|
|
|
} |
4948
|
|
|
|
4949
|
62 |
|
$save = \mb_substitute_character(); |
4950
|
62 |
|
\mb_substitute_character($replacementCharHelper); |
4951
|
|
|
// the polyfill maybe return false, so cast to string |
4952
|
62 |
|
$str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8'); |
4953
|
62 |
|
\mb_substitute_character($save); |
4954
|
|
|
} |
4955
|
|
|
|
4956
|
62 |
|
return \str_replace( |
4957
|
|
|
[ |
4958
|
62 |
|
"\xEF\xBF\xBD", |
4959
|
|
|
'�', |
4960
|
|
|
], |
4961
|
|
|
[ |
4962
|
62 |
|
$replacementChar, |
4963
|
62 |
|
$replacementChar, |
4964
|
|
|
], |
4965
|
62 |
|
$str |
4966
|
|
|
); |
4967
|
|
|
} |
4968
|
|
|
|
4969
|
|
|
/** |
4970
|
|
|
* Strip whitespace or other characters from end of a UTF-8 string. |
4971
|
|
|
* |
4972
|
|
|
* @param string $str <p>The string to be trimmed.</p> |
4973
|
|
|
* @param string|null $chars <p>Optional characters to be stripped.</p> |
4974
|
|
|
* |
4975
|
|
|
* @return string the string with unwanted characters stripped from the right |
4976
|
|
|
*/ |
4977
|
20 |
|
public static function rtrim(string $str = '', string $chars = null): string |
4978
|
|
|
{ |
4979
|
20 |
|
if ($str === '') { |
4980
|
3 |
|
return ''; |
4981
|
|
|
} |
4982
|
|
|
|
4983
|
19 |
|
if ($chars) { |
4984
|
8 |
|
$chars = \preg_quote($chars, '/'); |
4985
|
8 |
|
$pattern = "[${chars}]+$"; |
4986
|
|
|
} else { |
4987
|
14 |
|
$pattern = '[\\s]+$'; |
4988
|
|
|
} |
4989
|
|
|
|
4990
|
19 |
|
if (self::$SUPPORT['mbstring'] === true) { |
4991
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
4992
|
19 |
|
return (string) \mb_ereg_replace($pattern, '', $str); |
4993
|
|
|
} |
4994
|
|
|
|
4995
|
|
|
return self::regex_replace($str, $pattern, '', '', '/'); |
4996
|
|
|
} |
4997
|
|
|
|
4998
|
|
|
/** |
4999
|
|
|
* WARNING: Print native UTF-8 support (libs), e.g. for debugging. |
5000
|
|
|
* |
5001
|
|
|
* @psalm-suppress MissingReturnType |
5002
|
|
|
*/ |
5003
|
2 |
|
public static function showSupport() |
5004
|
|
|
{ |
5005
|
2 |
|
echo '<pre>'; |
5006
|
2 |
|
foreach (self::$SUPPORT as $key => &$value) { |
5007
|
2 |
|
echo $key . ' - ' . \print_r($value, true) . "\n<br>"; |
5008
|
|
|
} |
5009
|
2 |
|
unset($value); |
5010
|
2 |
|
echo '</pre>'; |
5011
|
2 |
|
} |
5012
|
|
|
|
5013
|
|
|
/** |
5014
|
|
|
* Converts a UTF-8 character to HTML Numbered Entity like "{". |
5015
|
|
|
* |
5016
|
|
|
* @param string $char <p>The Unicode character to be encoded as numbered entity.</p> |
5017
|
|
|
* @param bool $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</> |
5018
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5019
|
|
|
* |
5020
|
|
|
* @return string the HTML numbered entity |
5021
|
|
|
*/ |
5022
|
2 |
|
public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string |
5023
|
|
|
{ |
5024
|
2 |
|
if ($char === '') { |
5025
|
2 |
|
return ''; |
5026
|
|
|
} |
5027
|
|
|
|
5028
|
|
|
if ( |
5029
|
2 |
|
$keepAsciiChars === true |
5030
|
|
|
&& |
5031
|
2 |
|
self::is_ascii($char) === true |
5032
|
|
|
) { |
5033
|
2 |
|
return $char; |
5034
|
|
|
} |
5035
|
|
|
|
5036
|
2 |
|
return '&#' . self::ord($char, $encoding) . ';'; |
5037
|
|
|
} |
5038
|
|
|
|
5039
|
|
|
/** |
5040
|
|
|
* @param string $str |
5041
|
|
|
* @param int $tabLength |
5042
|
|
|
* |
5043
|
|
|
* @return string |
5044
|
|
|
*/ |
5045
|
5 |
|
public static function spaces_to_tabs(string $str, int $tabLength = 4): string |
5046
|
|
|
{ |
5047
|
5 |
|
if ($tabLength === 4) { |
5048
|
3 |
|
$tab = ' '; |
5049
|
2 |
|
} elseif ($tabLength === 2) { |
5050
|
1 |
|
$tab = ' '; |
5051
|
|
|
} else { |
5052
|
1 |
|
$tab = \str_repeat(' ', $tabLength); |
5053
|
|
|
} |
5054
|
|
|
|
5055
|
5 |
|
return \str_replace($tab, "\t", $str); |
5056
|
|
|
} |
5057
|
|
|
|
5058
|
|
|
/** |
5059
|
|
|
* alias for "UTF8::str_split()" |
5060
|
|
|
* |
5061
|
|
|
* @param string|string[] $str |
5062
|
|
|
* @param int $length |
5063
|
|
|
* @param bool $cleanUtf8 |
5064
|
|
|
* |
5065
|
|
|
* @return string[] |
5066
|
|
|
* |
5067
|
|
|
* @see UTF8::str_split() |
5068
|
|
|
*/ |
5069
|
9 |
|
public static function split( |
5070
|
|
|
$str, |
5071
|
|
|
int $length = 1, |
5072
|
|
|
bool $cleanUtf8 = false |
5073
|
|
|
): array { |
5074
|
9 |
|
return self::str_split($str, $length, $cleanUtf8); |
5075
|
|
|
} |
5076
|
|
|
|
5077
|
|
|
/** |
5078
|
|
|
* alias for "UTF8::str_starts_with()" |
5079
|
|
|
* |
5080
|
|
|
* @param string $haystack |
5081
|
|
|
* @param string $needle |
5082
|
|
|
* |
5083
|
|
|
* @return bool |
5084
|
|
|
* |
5085
|
|
|
* @see UTF8::str_starts_with() |
5086
|
|
|
*/ |
5087
|
|
|
public static function str_begins(string $haystack, string $needle): bool |
5088
|
|
|
{ |
5089
|
|
|
return self::str_starts_with($haystack, $needle); |
5090
|
|
|
} |
5091
|
|
|
|
5092
|
|
|
/** |
5093
|
|
|
* Returns a camelCase version of the string. Trims surrounding spaces, |
5094
|
|
|
* capitalizes letters following digits, spaces, dashes and underscores, |
5095
|
|
|
* and removes spaces, dashes, as well as underscores. |
5096
|
|
|
* |
5097
|
|
|
* @param string $str <p>The input string.</p> |
5098
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
5099
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
5100
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
5101
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
5102
|
|
|
* |
5103
|
|
|
* @return string |
5104
|
|
|
*/ |
5105
|
32 |
|
public static function str_camelize( |
5106
|
|
|
string $str, |
5107
|
|
|
string $encoding = 'UTF-8', |
5108
|
|
|
bool $cleanUtf8 = false, |
5109
|
|
|
string $lang = null, |
5110
|
|
|
bool $tryToKeepStringLength = false |
5111
|
|
|
): string { |
5112
|
32 |
|
if ($cleanUtf8 === true) { |
5113
|
|
|
$str = self::clean($str); |
5114
|
|
|
} |
5115
|
|
|
|
5116
|
32 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
5117
|
26 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
5118
|
|
|
} |
5119
|
|
|
|
5120
|
32 |
|
$str = self::lcfirst( |
5121
|
32 |
|
\trim($str), |
5122
|
32 |
|
$encoding, |
5123
|
32 |
|
false, |
5124
|
32 |
|
$lang, |
5125
|
32 |
|
$tryToKeepStringLength |
5126
|
|
|
); |
5127
|
32 |
|
$str = (string) \preg_replace('/^[-_]+/', '', $str); |
5128
|
|
|
|
5129
|
32 |
|
$useMbFunction = $lang === null && $tryToKeepStringLength === false; |
5130
|
|
|
|
5131
|
32 |
|
$str = (string) \preg_replace_callback( |
5132
|
32 |
|
'/[-_\\s]+(.)?/u', |
5133
|
|
|
/** |
5134
|
|
|
* @param array $match |
5135
|
|
|
* |
5136
|
|
|
* @return string |
5137
|
|
|
*/ |
5138
|
|
|
static function (array $match) use ($useMbFunction, $encoding, $lang, $tryToKeepStringLength): string { |
5139
|
27 |
|
if (isset($match[1])) { |
5140
|
27 |
|
if ($useMbFunction === true) { |
5141
|
27 |
|
if ($encoding === 'UTF-8') { |
5142
|
27 |
|
return \mb_strtoupper($match[1]); |
5143
|
|
|
} |
5144
|
|
|
|
5145
|
|
|
return \mb_strtoupper($match[1], $encoding); |
5146
|
|
|
} |
5147
|
|
|
|
5148
|
|
|
return self::strtoupper($match[1], $encoding, false, $lang, $tryToKeepStringLength); |
5149
|
|
|
} |
5150
|
|
|
|
5151
|
1 |
|
return ''; |
5152
|
32 |
|
}, |
5153
|
32 |
|
$str |
5154
|
|
|
); |
5155
|
|
|
|
5156
|
32 |
|
return (string) \preg_replace_callback( |
5157
|
32 |
|
'/[\\p{N}]+(.)?/u', |
5158
|
|
|
/** |
5159
|
|
|
* @param array $match |
5160
|
|
|
* |
5161
|
|
|
* @return string |
5162
|
|
|
*/ |
5163
|
|
|
static function (array $match) use ($useMbFunction, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength): string { |
5164
|
6 |
|
if ($useMbFunction === true) { |
5165
|
6 |
|
if ($encoding === 'UTF-8') { |
5166
|
6 |
|
return \mb_strtoupper($match[0]); |
5167
|
|
|
} |
5168
|
|
|
|
5169
|
|
|
return \mb_strtoupper($match[0], $encoding); |
5170
|
|
|
} |
5171
|
|
|
|
5172
|
|
|
return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
5173
|
32 |
|
}, |
5174
|
32 |
|
$str |
5175
|
|
|
); |
5176
|
|
|
} |
5177
|
|
|
|
5178
|
|
|
/** |
5179
|
|
|
* Returns the string with the first letter of each word capitalized, |
5180
|
|
|
* except for when the word is a name which shouldn't be capitalized. |
5181
|
|
|
* |
5182
|
|
|
* @param string $str |
5183
|
|
|
* |
5184
|
|
|
* @return string string with $str capitalized |
5185
|
|
|
*/ |
5186
|
1 |
|
public static function str_capitalize_name(string $str): string |
5187
|
|
|
{ |
5188
|
1 |
|
return self::str_capitalize_name_helper( |
5189
|
1 |
|
self::str_capitalize_name_helper( |
5190
|
1 |
|
self::collapse_whitespace($str), |
5191
|
1 |
|
' ' |
5192
|
|
|
), |
5193
|
1 |
|
'-' |
5194
|
|
|
); |
5195
|
|
|
} |
5196
|
|
|
|
5197
|
|
|
/** |
5198
|
|
|
* Returns true if the string contains $needle, false otherwise. By default |
5199
|
|
|
* the comparison is case-sensitive, but can be made insensitive by setting |
5200
|
|
|
* $caseSensitive to false. |
5201
|
|
|
* |
5202
|
|
|
* @param string $haystack <p>The input string.</p> |
5203
|
|
|
* @param string $needle <p>Substring to look for.</p> |
5204
|
|
|
* @param bool $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
5205
|
|
|
* |
5206
|
|
|
* @return bool whether or not $haystack contains $needle |
5207
|
|
|
*/ |
5208
|
21 |
|
public static function str_contains( |
5209
|
|
|
string $haystack, |
5210
|
|
|
string $needle, |
5211
|
|
|
bool $caseSensitive = true |
5212
|
|
|
): bool { |
5213
|
21 |
|
if ($caseSensitive) { |
5214
|
11 |
|
return \strpos($haystack, $needle) !== false; |
5215
|
|
|
} |
5216
|
|
|
|
5217
|
10 |
|
return \mb_stripos($haystack, $needle) !== false; |
5218
|
|
|
} |
5219
|
|
|
|
5220
|
|
|
/** |
5221
|
|
|
* Returns true if the string contains all $needles, false otherwise. By |
5222
|
|
|
* default the comparison is case-sensitive, but can be made insensitive by |
5223
|
|
|
* setting $caseSensitive to false. |
5224
|
|
|
* |
5225
|
|
|
* @param string $haystack <p>The input string.</p> |
5226
|
|
|
* @param array $needles <p>SubStrings to look for.</p> |
5227
|
|
|
* @param bool $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
5228
|
|
|
* |
5229
|
|
|
* @return bool whether or not $haystack contains $needle |
5230
|
|
|
*/ |
5231
|
44 |
|
public static function str_contains_all( |
5232
|
|
|
string $haystack, |
5233
|
|
|
array $needles, |
5234
|
|
|
bool $caseSensitive = true |
5235
|
|
|
): bool { |
5236
|
44 |
|
if ($haystack === '' || $needles === []) { |
5237
|
1 |
|
return false; |
5238
|
|
|
} |
5239
|
|
|
|
5240
|
|
|
/** @noinspection LoopWhichDoesNotLoopInspection */ |
5241
|
43 |
|
foreach ($needles as &$needle) { |
5242
|
43 |
|
if (!$needle) { |
5243
|
1 |
|
return false; |
5244
|
|
|
} |
5245
|
|
|
|
5246
|
42 |
|
if ($caseSensitive) { |
5247
|
22 |
|
return \strpos($haystack, $needle) !== false; |
5248
|
|
|
} |
5249
|
|
|
|
5250
|
20 |
|
return \mb_stripos($haystack, $needle) !== false; |
5251
|
|
|
} |
5252
|
|
|
|
5253
|
|
|
return true; |
5254
|
|
|
} |
5255
|
|
|
|
5256
|
|
|
/** |
5257
|
|
|
* Returns true if the string contains any $needles, false otherwise. By |
5258
|
|
|
* default the comparison is case-sensitive, but can be made insensitive by |
5259
|
|
|
* setting $caseSensitive to false. |
5260
|
|
|
* |
5261
|
|
|
* @param string $haystack <p>The input string.</p> |
5262
|
|
|
* @param array $needles <p>SubStrings to look for.</p> |
5263
|
|
|
* @param bool $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
5264
|
|
|
* |
5265
|
|
|
* @return bool |
5266
|
|
|
* Whether or not $str contains $needle |
5267
|
|
|
*/ |
5268
|
46 |
|
public static function str_contains_any( |
5269
|
|
|
string $haystack, |
5270
|
|
|
array $needles, |
5271
|
|
|
bool $caseSensitive = true |
5272
|
|
|
): bool { |
5273
|
46 |
|
if ($haystack === '' || $needles === []) { |
5274
|
1 |
|
return false; |
5275
|
|
|
} |
5276
|
|
|
|
5277
|
|
|
/** @noinspection LoopWhichDoesNotLoopInspection */ |
5278
|
45 |
|
foreach ($needles as &$needle) { |
5279
|
45 |
|
if (!$needle) { |
5280
|
|
|
continue; |
5281
|
|
|
} |
5282
|
|
|
|
5283
|
45 |
|
if ($caseSensitive) { |
5284
|
25 |
|
if (\strpos($haystack, $needle) !== false) { |
5285
|
14 |
|
return true; |
5286
|
|
|
} |
5287
|
|
|
|
5288
|
13 |
|
continue; |
5289
|
|
|
} |
5290
|
|
|
|
5291
|
20 |
|
if (\mb_stripos($haystack, $needle) !== false) { |
5292
|
20 |
|
return true; |
5293
|
|
|
} |
5294
|
|
|
} |
5295
|
|
|
|
5296
|
19 |
|
return false; |
5297
|
|
|
} |
5298
|
|
|
|
5299
|
|
|
/** |
5300
|
|
|
* Returns a lowercase and trimmed string separated by dashes. Dashes are |
5301
|
|
|
* inserted before uppercase characters (with the exception of the first |
5302
|
|
|
* character of the string), and in place of spaces as well as underscores. |
5303
|
|
|
* |
5304
|
|
|
* @param string $str <p>The input string.</p> |
5305
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5306
|
|
|
* |
5307
|
|
|
* @return string |
5308
|
|
|
*/ |
5309
|
19 |
|
public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string |
5310
|
|
|
{ |
5311
|
19 |
|
return self::str_delimit($str, '-', $encoding); |
5312
|
|
|
} |
5313
|
|
|
|
5314
|
|
|
/** |
5315
|
|
|
* Returns a lowercase and trimmed string separated by the given delimiter. |
5316
|
|
|
* Delimiters are inserted before uppercase characters (with the exception |
5317
|
|
|
* of the first character of the string), and in place of spaces, dashes, |
5318
|
|
|
* and underscores. Alpha delimiters are not converted to lowercase. |
5319
|
|
|
* |
5320
|
|
|
* @param string $str <p>The input string.</p> |
5321
|
|
|
* @param string $delimiter <p>Sequence used to separate parts of the string.</p> |
5322
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5323
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
5324
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
5325
|
|
|
* tr</p> |
5326
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> |
5327
|
|
|
* ß</p> |
5328
|
|
|
* |
5329
|
|
|
* @return string |
5330
|
|
|
*/ |
5331
|
49 |
|
public static function str_delimit( |
5332
|
|
|
string $str, |
5333
|
|
|
string $delimiter, |
5334
|
|
|
string $encoding = 'UTF-8', |
5335
|
|
|
bool $cleanUtf8 = false, |
5336
|
|
|
string $lang = null, |
5337
|
|
|
bool $tryToKeepStringLength = false |
5338
|
|
|
): string { |
5339
|
49 |
|
if (self::$SUPPORT['mbstring'] === true) { |
5340
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
5341
|
49 |
|
$str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str)); |
5342
|
|
|
|
5343
|
49 |
|
$useMbFunction = $lang === null && $tryToKeepStringLength === false; |
5344
|
49 |
|
if ($useMbFunction === true && $encoding === 'UTF-8') { |
5345
|
22 |
|
$str = \mb_strtolower($str); |
5346
|
|
|
} else { |
5347
|
27 |
|
$str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
5348
|
|
|
} |
5349
|
|
|
|
5350
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
5351
|
49 |
|
return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str); |
5352
|
|
|
} |
5353
|
|
|
|
5354
|
|
|
$str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str)); |
5355
|
|
|
|
5356
|
|
|
$useMbFunction = $lang === null && $tryToKeepStringLength === false; |
5357
|
|
|
if ($useMbFunction === true && $encoding === 'UTF-8') { |
5358
|
|
|
$str = \mb_strtolower($str); |
5359
|
|
|
} else { |
5360
|
|
|
$str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
5361
|
|
|
} |
5362
|
|
|
|
5363
|
|
|
return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str); |
5364
|
|
|
} |
5365
|
|
|
|
5366
|
|
|
/** |
5367
|
|
|
* Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32. |
5368
|
|
|
* |
5369
|
|
|
* @param string $str <p>The input string.</p> |
5370
|
|
|
* |
5371
|
|
|
* @return false|string |
5372
|
|
|
* The detected string-encoding e.g. UTF-8 or UTF-16BE,<br> |
5373
|
|
|
* otherwise it will return false e.g. for BINARY or not detected encoding. |
5374
|
|
|
*/ |
5375
|
30 |
|
public static function str_detect_encoding($str) |
5376
|
|
|
{ |
5377
|
|
|
// init |
5378
|
30 |
|
$str = (string) $str; |
5379
|
|
|
|
5380
|
|
|
// |
5381
|
|
|
// 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ... |
5382
|
|
|
// |
5383
|
|
|
|
5384
|
30 |
|
if (self::is_binary($str, true) === true) { |
5385
|
11 |
|
$isUtf16 = self::is_utf16($str, false); |
5386
|
11 |
|
if ($isUtf16 === 1) { |
5387
|
2 |
|
return 'UTF-16LE'; |
5388
|
|
|
} |
5389
|
11 |
|
if ($isUtf16 === 2) { |
5390
|
2 |
|
return 'UTF-16BE'; |
5391
|
|
|
} |
5392
|
|
|
|
5393
|
9 |
|
$isUtf32 = self::is_utf32($str, false); |
5394
|
9 |
|
if ($isUtf32 === 1) { |
5395
|
|
|
return 'UTF-32LE'; |
5396
|
|
|
} |
5397
|
9 |
|
if ($isUtf32 === 2) { |
5398
|
|
|
return 'UTF-32BE'; |
5399
|
|
|
} |
5400
|
|
|
|
5401
|
|
|
// is binary but not "UTF-16" or "UTF-32" |
5402
|
9 |
|
return false; |
5403
|
|
|
} |
5404
|
|
|
|
5405
|
|
|
// |
5406
|
|
|
// 2.) simple check for ASCII chars |
5407
|
|
|
// |
5408
|
|
|
|
5409
|
26 |
|
if (self::is_ascii($str) === true) { |
5410
|
10 |
|
return 'ASCII'; |
5411
|
|
|
} |
5412
|
|
|
|
5413
|
|
|
// |
5414
|
|
|
// 3.) simple check for UTF-8 chars |
5415
|
|
|
// |
5416
|
|
|
|
5417
|
26 |
|
if (self::is_utf8($str) === true) { |
5418
|
19 |
|
return 'UTF-8'; |
5419
|
|
|
} |
5420
|
|
|
|
5421
|
|
|
// |
5422
|
|
|
// 4.) check via "mb_detect_encoding()" |
5423
|
|
|
// |
5424
|
|
|
// INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()" |
5425
|
|
|
|
5426
|
15 |
|
if (self::$SUPPORT['mbstring'] === true) { |
5427
|
|
|
// info: do not use the symfony polyfill here |
5428
|
15 |
|
$encoding = \mb_detect_encoding($str, \mb_detect_order(), true); |
5429
|
15 |
|
if ($encoding) { |
5430
|
|
|
return $encoding; |
5431
|
|
|
} |
5432
|
|
|
} |
5433
|
|
|
|
5434
|
|
|
// |
5435
|
|
|
// 5.) check via "iconv()" |
5436
|
|
|
// |
5437
|
|
|
|
5438
|
15 |
|
if (self::$ENCODINGS === null) { |
5439
|
|
|
self::$ENCODINGS = self::getData('encodings'); |
5440
|
|
|
} |
5441
|
|
|
|
5442
|
15 |
|
foreach (self::$ENCODINGS as $encodingTmp) { |
5443
|
|
|
// INFO: //IGNORE but still throw notice |
5444
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
5445
|
15 |
|
if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) { |
5446
|
15 |
|
return $encodingTmp; |
5447
|
|
|
} |
5448
|
|
|
} |
5449
|
|
|
|
5450
|
|
|
return false; |
5451
|
|
|
} |
5452
|
|
|
|
5453
|
|
|
/** |
5454
|
|
|
* alias for "UTF8::str_ends_with()" |
5455
|
|
|
* |
5456
|
|
|
* @param string $haystack |
5457
|
|
|
* @param string $needle |
5458
|
|
|
* |
5459
|
|
|
* @return bool |
5460
|
|
|
* |
5461
|
|
|
* @see UTF8::str_ends_with() |
5462
|
|
|
*/ |
5463
|
|
|
public static function str_ends(string $haystack, string $needle): bool |
5464
|
|
|
{ |
5465
|
|
|
return self::str_ends_with($haystack, $needle); |
5466
|
|
|
} |
5467
|
|
|
|
5468
|
|
|
/** |
5469
|
|
|
* Check if the string ends with the given substring. |
5470
|
|
|
* |
5471
|
|
|
* @param string $haystack <p>The string to search in.</p> |
5472
|
|
|
* @param string $needle <p>The substring to search for.</p> |
5473
|
|
|
* |
5474
|
|
|
* @return bool |
5475
|
|
|
*/ |
5476
|
9 |
|
public static function str_ends_with(string $haystack, string $needle): bool |
5477
|
|
|
{ |
5478
|
9 |
|
if ($needle === '') { |
5479
|
2 |
|
return true; |
5480
|
|
|
} |
5481
|
|
|
|
5482
|
9 |
|
if ($haystack === '') { |
5483
|
|
|
return false; |
5484
|
|
|
} |
5485
|
|
|
|
5486
|
9 |
|
return \substr($haystack, -\strlen($needle)) === $needle; |
5487
|
|
|
} |
5488
|
|
|
|
5489
|
|
|
/** |
5490
|
|
|
* Returns true if the string ends with any of $substrings, false otherwise. |
5491
|
|
|
* |
5492
|
|
|
* - case-sensitive |
5493
|
|
|
* |
5494
|
|
|
* @param string $str <p>The input string.</p> |
5495
|
|
|
* @param string[] $substrings <p>Substrings to look for.</p> |
5496
|
|
|
* |
5497
|
|
|
* @return bool whether or not $str ends with $substring |
5498
|
|
|
*/ |
5499
|
7 |
|
public static function str_ends_with_any(string $str, array $substrings): bool |
5500
|
|
|
{ |
5501
|
7 |
|
if ($substrings === []) { |
5502
|
|
|
return false; |
5503
|
|
|
} |
5504
|
|
|
|
5505
|
7 |
|
foreach ($substrings as &$substring) { |
5506
|
7 |
|
if (\substr($str, -\strlen($substring)) === $substring) { |
5507
|
7 |
|
return true; |
5508
|
|
|
} |
5509
|
|
|
} |
5510
|
|
|
|
5511
|
6 |
|
return false; |
5512
|
|
|
} |
5513
|
|
|
|
5514
|
|
|
/** |
5515
|
|
|
* Ensures that the string begins with $substring. If it doesn't, it's |
5516
|
|
|
* prepended. |
5517
|
|
|
* |
5518
|
|
|
* @param string $str <p>The input string.</p> |
5519
|
|
|
* @param string $substring <p>The substring to add if not present.</p> |
5520
|
|
|
* |
5521
|
|
|
* @return string |
5522
|
|
|
*/ |
5523
|
10 |
|
public static function str_ensure_left(string $str, string $substring): string |
5524
|
|
|
{ |
5525
|
|
|
if ( |
5526
|
10 |
|
$substring !== '' |
5527
|
|
|
&& |
5528
|
10 |
|
\strpos($str, $substring) === 0 |
5529
|
|
|
) { |
5530
|
6 |
|
return $str; |
5531
|
|
|
} |
5532
|
|
|
|
5533
|
4 |
|
return $substring . $str; |
5534
|
|
|
} |
5535
|
|
|
|
5536
|
|
|
/** |
5537
|
|
|
* Ensures that the string ends with $substring. If it doesn't, it's appended. |
5538
|
|
|
* |
5539
|
|
|
* @param string $str <p>The input string.</p> |
5540
|
|
|
* @param string $substring <p>The substring to add if not present.</p> |
5541
|
|
|
* |
5542
|
|
|
* @return string |
5543
|
|
|
*/ |
5544
|
10 |
|
public static function str_ensure_right(string $str, string $substring): string |
5545
|
|
|
{ |
5546
|
|
|
if ( |
5547
|
10 |
|
$str === '' |
5548
|
|
|
|| |
5549
|
10 |
|
$substring === '' |
5550
|
|
|
|| |
5551
|
10 |
|
\substr($str, -\strlen($substring)) !== $substring |
5552
|
|
|
) { |
5553
|
4 |
|
$str .= $substring; |
5554
|
|
|
} |
5555
|
|
|
|
5556
|
10 |
|
return $str; |
5557
|
|
|
} |
5558
|
|
|
|
5559
|
|
|
/** |
5560
|
|
|
* Capitalizes the first word of the string, replaces underscores with |
5561
|
|
|
* spaces, and strips '_id'. |
5562
|
|
|
* |
5563
|
|
|
* @param string $str |
5564
|
|
|
* |
5565
|
|
|
* @return string |
5566
|
|
|
*/ |
5567
|
3 |
|
public static function str_humanize($str): string |
5568
|
|
|
{ |
5569
|
3 |
|
$str = \str_replace( |
5570
|
|
|
[ |
5571
|
3 |
|
'_id', |
5572
|
|
|
'_', |
5573
|
|
|
], |
5574
|
|
|
[ |
5575
|
3 |
|
'', |
5576
|
|
|
' ', |
5577
|
|
|
], |
5578
|
3 |
|
$str |
5579
|
|
|
); |
5580
|
|
|
|
5581
|
3 |
|
return self::ucfirst(\trim($str)); |
5582
|
|
|
} |
5583
|
|
|
|
5584
|
|
|
/** |
5585
|
|
|
* alias for "UTF8::str_istarts_with()" |
5586
|
|
|
* |
5587
|
|
|
* @param string $haystack |
5588
|
|
|
* @param string $needle |
5589
|
|
|
* |
5590
|
|
|
* @return bool |
5591
|
|
|
* |
5592
|
|
|
* @see UTF8::str_istarts_with() |
5593
|
|
|
*/ |
5594
|
|
|
public static function str_ibegins(string $haystack, string $needle): bool |
5595
|
|
|
{ |
5596
|
|
|
return self::str_istarts_with($haystack, $needle); |
5597
|
|
|
} |
5598
|
|
|
|
5599
|
|
|
/** |
5600
|
|
|
* alias for "UTF8::str_iends_with()" |
5601
|
|
|
* |
5602
|
|
|
* @param string $haystack |
5603
|
|
|
* @param string $needle |
5604
|
|
|
* |
5605
|
|
|
* @return bool |
5606
|
|
|
* |
5607
|
|
|
* @see UTF8::str_iends_with() |
5608
|
|
|
*/ |
5609
|
|
|
public static function str_iends(string $haystack, string $needle): bool |
5610
|
|
|
{ |
5611
|
|
|
return self::str_iends_with($haystack, $needle); |
5612
|
|
|
} |
5613
|
|
|
|
5614
|
|
|
/** |
5615
|
|
|
* Check if the string ends with the given substring, case insensitive. |
5616
|
|
|
* |
5617
|
|
|
* @param string $haystack <p>The string to search in.</p> |
5618
|
|
|
* @param string $needle <p>The substring to search for.</p> |
5619
|
|
|
* |
5620
|
|
|
* @return bool |
5621
|
|
|
*/ |
5622
|
12 |
|
public static function str_iends_with(string $haystack, string $needle): bool |
5623
|
|
|
{ |
5624
|
12 |
|
if ($needle === '') { |
5625
|
2 |
|
return true; |
5626
|
|
|
} |
5627
|
|
|
|
5628
|
12 |
|
if ($haystack === '') { |
5629
|
|
|
return false; |
5630
|
|
|
} |
5631
|
|
|
|
5632
|
12 |
|
return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0; |
5633
|
|
|
} |
5634
|
|
|
|
5635
|
|
|
/** |
5636
|
|
|
* Returns true if the string ends with any of $substrings, false otherwise. |
5637
|
|
|
* |
5638
|
|
|
* - case-insensitive |
5639
|
|
|
* |
5640
|
|
|
* @param string $str <p>The input string.</p> |
5641
|
|
|
* @param string[] $substrings <p>Substrings to look for.</p> |
5642
|
|
|
* |
5643
|
|
|
* @return bool whether or not $str ends with $substring |
5644
|
|
|
*/ |
5645
|
4 |
|
public static function str_iends_with_any(string $str, array $substrings): bool |
5646
|
|
|
{ |
5647
|
4 |
|
if ($substrings === []) { |
5648
|
|
|
return false; |
5649
|
|
|
} |
5650
|
|
|
|
5651
|
4 |
|
foreach ($substrings as &$substring) { |
5652
|
4 |
|
if (self::str_iends_with($str, $substring)) { |
5653
|
4 |
|
return true; |
5654
|
|
|
} |
5655
|
|
|
} |
5656
|
|
|
|
5657
|
|
|
return false; |
5658
|
|
|
} |
5659
|
|
|
|
5660
|
|
|
/** |
5661
|
|
|
* Returns the index of the first occurrence of $needle in the string, |
5662
|
|
|
* and false if not found. Accepts an optional offset from which to begin |
5663
|
|
|
* the search. |
5664
|
|
|
* |
5665
|
|
|
* @param string $str <p>The input string.</p> |
5666
|
|
|
* @param string $needle <p>Substring to look for.</p> |
5667
|
|
|
* @param int $offset [optional] <p>Offset from which to search. Default: 0</p> |
5668
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5669
|
|
|
* |
5670
|
|
|
* @return false|int |
5671
|
|
|
* The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong> |
5672
|
|
|
*/ |
5673
|
2 |
|
public static function str_iindex_first( |
5674
|
|
|
string $str, |
5675
|
|
|
string $needle, |
5676
|
|
|
int $offset = 0, |
5677
|
|
|
string $encoding = 'UTF-8' |
5678
|
|
|
) { |
5679
|
2 |
|
return self::stripos( |
5680
|
2 |
|
$str, |
5681
|
2 |
|
$needle, |
5682
|
2 |
|
$offset, |
5683
|
2 |
|
$encoding |
5684
|
|
|
); |
5685
|
|
|
} |
5686
|
|
|
|
5687
|
|
|
/** |
5688
|
|
|
* Returns the index of the last occurrence of $needle in the string, |
5689
|
|
|
* and false if not found. Accepts an optional offset from which to begin |
5690
|
|
|
* the search. Offsets may be negative to count from the last character |
5691
|
|
|
* in the string. |
5692
|
|
|
* |
5693
|
|
|
* @param string $str <p>The input string.</p> |
5694
|
|
|
* @param string $needle <p>Substring to look for.</p> |
5695
|
|
|
* @param int $offset [optional] <p>Offset from which to search. Default: 0</p> |
5696
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5697
|
|
|
* |
5698
|
|
|
* @return false|int |
5699
|
|
|
* The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong> |
5700
|
|
|
*/ |
5701
|
|
|
public static function str_iindex_last( |
5702
|
|
|
string $str, |
5703
|
|
|
string $needle, |
5704
|
|
|
int $offset = 0, |
5705
|
|
|
string $encoding = 'UTF-8' |
5706
|
|
|
) { |
5707
|
|
|
return self::strripos( |
5708
|
|
|
$str, |
5709
|
|
|
$needle, |
5710
|
|
|
$offset, |
5711
|
|
|
$encoding |
5712
|
|
|
); |
5713
|
|
|
} |
5714
|
|
|
|
5715
|
|
|
/** |
5716
|
|
|
* Returns the index of the first occurrence of $needle in the string, |
5717
|
|
|
* and false if not found. Accepts an optional offset from which to begin |
5718
|
|
|
* the search. |
5719
|
|
|
* |
5720
|
|
|
* @param string $str <p>The input string.</p> |
5721
|
|
|
* @param string $needle <p>Substring to look for.</p> |
5722
|
|
|
* @param int $offset [optional] <p>Offset from which to search. Default: 0</p> |
5723
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5724
|
|
|
* |
5725
|
|
|
* @return false|int |
5726
|
|
|
* The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong> |
5727
|
|
|
*/ |
5728
|
10 |
|
public static function str_index_first( |
5729
|
|
|
string $str, |
5730
|
|
|
string $needle, |
5731
|
|
|
int $offset = 0, |
5732
|
|
|
string $encoding = 'UTF-8' |
5733
|
|
|
) { |
5734
|
10 |
|
return self::strpos( |
5735
|
10 |
|
$str, |
5736
|
10 |
|
$needle, |
5737
|
10 |
|
$offset, |
5738
|
10 |
|
$encoding |
5739
|
|
|
); |
5740
|
|
|
} |
5741
|
|
|
|
5742
|
|
|
/** |
5743
|
|
|
* Returns the index of the last occurrence of $needle in the string, |
5744
|
|
|
* and false if not found. Accepts an optional offset from which to begin |
5745
|
|
|
* the search. Offsets may be negative to count from the last character |
5746
|
|
|
* in the string. |
5747
|
|
|
* |
5748
|
|
|
* @param string $str <p>The input string.</p> |
5749
|
|
|
* @param string $needle <p>Substring to look for.</p> |
5750
|
|
|
* @param int $offset [optional] <p>Offset from which to search. Default: 0</p> |
5751
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5752
|
|
|
* |
5753
|
|
|
* @return false|int |
5754
|
|
|
* The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong> |
5755
|
|
|
*/ |
5756
|
10 |
|
public static function str_index_last( |
5757
|
|
|
string $str, |
5758
|
|
|
string $needle, |
5759
|
|
|
int $offset = 0, |
5760
|
|
|
string $encoding = 'UTF-8' |
5761
|
|
|
) { |
5762
|
10 |
|
return self::strrpos( |
5763
|
10 |
|
$str, |
5764
|
10 |
|
$needle, |
5765
|
10 |
|
$offset, |
5766
|
10 |
|
$encoding |
5767
|
|
|
); |
5768
|
|
|
} |
5769
|
|
|
|
5770
|
|
|
/** |
5771
|
|
|
* Inserts $substring into the string at the $index provided. |
5772
|
|
|
* |
5773
|
|
|
* @param string $str <p>The input string.</p> |
5774
|
|
|
* @param string $substring <p>String to be inserted.</p> |
5775
|
|
|
* @param int $index <p>The index at which to insert the substring.</p> |
5776
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5777
|
|
|
* |
5778
|
|
|
* @return string |
5779
|
|
|
*/ |
5780
|
8 |
|
public static function str_insert( |
5781
|
|
|
string $str, |
5782
|
|
|
string $substring, |
5783
|
|
|
int $index, |
5784
|
|
|
string $encoding = 'UTF-8' |
5785
|
|
|
): string { |
5786
|
8 |
|
if ($encoding === 'UTF-8') { |
5787
|
4 |
|
$len = (int) \mb_strlen($str); |
5788
|
4 |
|
if ($index > $len) { |
5789
|
|
|
return $str; |
5790
|
|
|
} |
5791
|
|
|
|
5792
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
5793
|
4 |
|
return (string) \mb_substr($str, 0, $index) . |
5794
|
4 |
|
$substring . |
5795
|
4 |
|
(string) \mb_substr($str, $index, $len); |
5796
|
|
|
} |
5797
|
|
|
|
5798
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
5799
|
|
|
|
5800
|
4 |
|
$len = (int) self::strlen($str, $encoding); |
5801
|
4 |
|
if ($index > $len) { |
5802
|
1 |
|
return $str; |
5803
|
|
|
} |
5804
|
|
|
|
5805
|
3 |
|
return ((string) self::substr($str, 0, $index, $encoding)) . |
5806
|
3 |
|
$substring . |
5807
|
3 |
|
((string) self::substr($str, $index, $len, $encoding)); |
5808
|
|
|
} |
5809
|
|
|
|
5810
|
|
|
/** |
5811
|
|
|
* Case-insensitive and UTF-8 safe version of <function>str_replace</function>. |
5812
|
|
|
* |
5813
|
|
|
* @see http://php.net/manual/en/function.str-ireplace.php |
5814
|
|
|
* |
5815
|
|
|
* @param mixed $search <p> |
5816
|
|
|
* Every replacement with search array is |
5817
|
|
|
* performed on the result of previous replacement. |
5818
|
|
|
* </p> |
5819
|
|
|
* @param mixed $replace <p> |
5820
|
|
|
* </p> |
5821
|
|
|
* @param mixed $subject <p> |
5822
|
|
|
* If subject is an array, then the search and |
5823
|
|
|
* replace is performed with every entry of |
5824
|
|
|
* subject, and the return value is an array as |
5825
|
|
|
* well. |
5826
|
|
|
* </p> |
5827
|
|
|
* @param int $count [optional] <p> |
5828
|
|
|
* The number of matched and replaced needles will |
5829
|
|
|
* be returned in count which is passed by |
5830
|
|
|
* reference. |
5831
|
|
|
* </p> |
5832
|
|
|
* |
5833
|
|
|
* @return mixed a string or an array of replacements |
5834
|
|
|
*/ |
5835
|
29 |
|
public static function str_ireplace($search, $replace, $subject, &$count = null) |
5836
|
|
|
{ |
5837
|
29 |
|
$search = (array) $search; |
5838
|
|
|
|
5839
|
|
|
/** @noinspection AlterInForeachInspection */ |
5840
|
29 |
|
foreach ($search as &$s) { |
5841
|
29 |
|
$s = (string) $s; |
5842
|
29 |
|
if ($s === '') { |
5843
|
6 |
|
$s = '/^(?<=.)$/'; |
5844
|
|
|
} else { |
5845
|
29 |
|
$s = '/' . \preg_quote($s, '/') . '/ui'; |
5846
|
|
|
} |
5847
|
|
|
} |
5848
|
|
|
|
5849
|
29 |
|
$subject = \preg_replace($search, $replace, $subject, -1, $replace); |
5850
|
29 |
|
$count = $replace; // used as reference parameter |
5851
|
|
|
|
5852
|
29 |
|
return $subject; |
5853
|
|
|
} |
5854
|
|
|
|
5855
|
|
|
/** |
5856
|
|
|
* Replaces $search from the beginning of string with $replacement. |
5857
|
|
|
* |
5858
|
|
|
* @param string $str <p>The input string.</p> |
5859
|
|
|
* @param string $search <p>The string to search for.</p> |
5860
|
|
|
* @param string $replacement <p>The replacement.</p> |
5861
|
|
|
* |
5862
|
|
|
* @return string string after the replacements |
5863
|
|
|
*/ |
5864
|
17 |
|
public static function str_ireplace_beginning(string $str, string $search, string $replacement): string |
5865
|
|
|
{ |
5866
|
17 |
|
if ($str === '') { |
5867
|
4 |
|
if ($replacement === '') { |
5868
|
2 |
|
return ''; |
5869
|
|
|
} |
5870
|
|
|
|
5871
|
2 |
|
if ($search === '') { |
5872
|
2 |
|
return $replacement; |
5873
|
|
|
} |
5874
|
|
|
} |
5875
|
|
|
|
5876
|
13 |
|
if ($search === '') { |
5877
|
2 |
|
return $str . $replacement; |
5878
|
|
|
} |
5879
|
|
|
|
5880
|
11 |
|
if (\stripos($str, $search) === 0) { |
5881
|
10 |
|
return $replacement . \substr($str, \strlen($search)); |
5882
|
|
|
} |
5883
|
|
|
|
5884
|
1 |
|
return $str; |
5885
|
|
|
} |
5886
|
|
|
|
5887
|
|
|
/** |
5888
|
|
|
* Replaces $search from the ending of string with $replacement. |
5889
|
|
|
* |
5890
|
|
|
* @param string $str <p>The input string.</p> |
5891
|
|
|
* @param string $search <p>The string to search for.</p> |
5892
|
|
|
* @param string $replacement <p>The replacement.</p> |
5893
|
|
|
* |
5894
|
|
|
* @return string string after the replacements |
5895
|
|
|
*/ |
5896
|
17 |
|
public static function str_ireplace_ending(string $str, string $search, string $replacement): string |
5897
|
|
|
{ |
5898
|
17 |
|
if ($str === '') { |
5899
|
4 |
|
if ($replacement === '') { |
5900
|
2 |
|
return ''; |
5901
|
|
|
} |
5902
|
|
|
|
5903
|
2 |
|
if ($search === '') { |
5904
|
2 |
|
return $replacement; |
5905
|
|
|
} |
5906
|
|
|
} |
5907
|
|
|
|
5908
|
13 |
|
if ($search === '') { |
5909
|
2 |
|
return $str . $replacement; |
5910
|
|
|
} |
5911
|
|
|
|
5912
|
11 |
|
if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) { |
5913
|
9 |
|
$str = \substr($str, 0, -\strlen($search)) . $replacement; |
5914
|
|
|
} |
5915
|
|
|
|
5916
|
11 |
|
return $str; |
5917
|
|
|
} |
5918
|
|
|
|
5919
|
|
|
/** |
5920
|
|
|
* Check if the string starts with the given substring, case insensitive. |
5921
|
|
|
* |
5922
|
|
|
* @param string $haystack <p>The string to search in.</p> |
5923
|
|
|
* @param string $needle <p>The substring to search for.</p> |
5924
|
|
|
* |
5925
|
|
|
* @return bool |
5926
|
|
|
*/ |
5927
|
12 |
|
public static function str_istarts_with(string $haystack, string $needle): bool |
5928
|
|
|
{ |
5929
|
12 |
|
if ($needle === '') { |
5930
|
2 |
|
return true; |
5931
|
|
|
} |
5932
|
|
|
|
5933
|
12 |
|
if ($haystack === '') { |
5934
|
|
|
return false; |
5935
|
|
|
} |
5936
|
|
|
|
5937
|
12 |
|
return self::stripos($haystack, $needle) === 0; |
5938
|
|
|
} |
5939
|
|
|
|
5940
|
|
|
/** |
5941
|
|
|
* Returns true if the string begins with any of $substrings, false otherwise. |
5942
|
|
|
* |
5943
|
|
|
* - case-insensitive |
5944
|
|
|
* |
5945
|
|
|
* @param string $str <p>The input string.</p> |
5946
|
|
|
* @param array $substrings <p>Substrings to look for.</p> |
5947
|
|
|
* |
5948
|
|
|
* @return bool whether or not $str starts with $substring |
5949
|
|
|
*/ |
5950
|
4 |
|
public static function str_istarts_with_any(string $str, array $substrings): bool |
5951
|
|
|
{ |
5952
|
4 |
|
if ($str === '') { |
5953
|
|
|
return false; |
5954
|
|
|
} |
5955
|
|
|
|
5956
|
4 |
|
if ($substrings === []) { |
5957
|
|
|
return false; |
5958
|
|
|
} |
5959
|
|
|
|
5960
|
4 |
|
foreach ($substrings as &$substring) { |
5961
|
4 |
|
if (self::str_istarts_with($str, $substring)) { |
5962
|
4 |
|
return true; |
5963
|
|
|
} |
5964
|
|
|
} |
5965
|
|
|
|
5966
|
|
|
return false; |
5967
|
|
|
} |
5968
|
|
|
|
5969
|
|
|
/** |
5970
|
|
|
* Gets the substring after the first occurrence of a separator. |
5971
|
|
|
* |
5972
|
|
|
* @param string $str <p>The input string.</p> |
5973
|
|
|
* @param string $separator <p>The string separator.</p> |
5974
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
5975
|
|
|
* |
5976
|
|
|
* @return string |
5977
|
|
|
*/ |
5978
|
1 |
|
public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
5979
|
|
|
{ |
5980
|
1 |
|
if ($separator === '' || $str === '') { |
5981
|
1 |
|
return ''; |
5982
|
|
|
} |
5983
|
|
|
|
5984
|
1 |
|
$offset = self::str_iindex_first($str, $separator); |
5985
|
1 |
|
if ($offset === false) { |
5986
|
1 |
|
return ''; |
5987
|
|
|
} |
5988
|
|
|
|
5989
|
1 |
|
if ($encoding === 'UTF-8') { |
5990
|
1 |
|
return (string) \mb_substr( |
5991
|
1 |
|
$str, |
5992
|
1 |
|
$offset + (int) \mb_strlen($separator) |
5993
|
|
|
); |
5994
|
|
|
} |
5995
|
|
|
|
5996
|
|
|
return (string) self::substr( |
5997
|
|
|
$str, |
5998
|
|
|
$offset + (int) self::strlen($separator, $encoding), |
5999
|
|
|
null, |
6000
|
|
|
$encoding |
6001
|
|
|
); |
6002
|
|
|
} |
6003
|
|
|
|
6004
|
|
|
/** |
6005
|
|
|
* Gets the substring after the last occurrence of a separator. |
6006
|
|
|
* |
6007
|
|
|
* @param string $str <p>The input string.</p> |
6008
|
|
|
* @param string $separator <p>The string separator.</p> |
6009
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
6010
|
|
|
* |
6011
|
|
|
* @return string |
6012
|
|
|
*/ |
6013
|
1 |
|
public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
6014
|
|
|
{ |
6015
|
1 |
|
if ($separator === '' || $str === '') { |
6016
|
1 |
|
return ''; |
6017
|
|
|
} |
6018
|
|
|
|
6019
|
1 |
|
$offset = self::strripos($str, $separator); |
6020
|
1 |
|
if ($offset === false) { |
6021
|
1 |
|
return ''; |
6022
|
|
|
} |
6023
|
|
|
|
6024
|
1 |
|
if ($encoding === 'UTF-8') { |
6025
|
1 |
|
return (string) \mb_substr( |
6026
|
1 |
|
$str, |
6027
|
1 |
|
$offset + (int) self::strlen($separator) |
6028
|
|
|
); |
6029
|
|
|
} |
6030
|
|
|
|
6031
|
|
|
return (string) self::substr( |
6032
|
|
|
$str, |
6033
|
|
|
$offset + (int) self::strlen($separator, $encoding), |
6034
|
|
|
null, |
6035
|
|
|
$encoding |
6036
|
|
|
); |
6037
|
|
|
} |
6038
|
|
|
|
6039
|
|
|
/** |
6040
|
|
|
* Gets the substring before the first occurrence of a separator. |
6041
|
|
|
* |
6042
|
|
|
* @param string $str <p>The input string.</p> |
6043
|
|
|
* @param string $separator <p>The string separator.</p> |
6044
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
6045
|
|
|
* |
6046
|
|
|
* @return string |
6047
|
|
|
*/ |
6048
|
1 |
|
public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
6049
|
|
|
{ |
6050
|
1 |
|
if ($separator === '' || $str === '') { |
6051
|
1 |
|
return ''; |
6052
|
|
|
} |
6053
|
|
|
|
6054
|
1 |
|
$offset = self::str_iindex_first($str, $separator); |
6055
|
1 |
|
if ($offset === false) { |
6056
|
1 |
|
return ''; |
6057
|
|
|
} |
6058
|
|
|
|
6059
|
1 |
|
if ($encoding === 'UTF-8') { |
6060
|
1 |
|
return (string) \mb_substr($str, 0, $offset); |
6061
|
|
|
} |
6062
|
|
|
|
6063
|
|
|
return (string) self::substr($str, 0, $offset, $encoding); |
6064
|
|
|
} |
6065
|
|
|
|
6066
|
|
|
/** |
6067
|
|
|
* Gets the substring before the last occurrence of a separator. |
6068
|
|
|
* |
6069
|
|
|
* @param string $str <p>The input string.</p> |
6070
|
|
|
* @param string $separator <p>The string separator.</p> |
6071
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
6072
|
|
|
* |
6073
|
|
|
* @return string |
6074
|
|
|
*/ |
6075
|
1 |
|
public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
6076
|
|
|
{ |
6077
|
1 |
|
if ($separator === '' || $str === '') { |
6078
|
1 |
|
return ''; |
6079
|
|
|
} |
6080
|
|
|
|
6081
|
1 |
|
if ($encoding === 'UTF-8') { |
6082
|
1 |
|
$offset = \mb_strripos($str, $separator); |
6083
|
1 |
|
if ($offset === false) { |
6084
|
1 |
|
return ''; |
6085
|
|
|
} |
6086
|
|
|
|
6087
|
1 |
|
return (string) \mb_substr($str, 0, $offset); |
6088
|
|
|
} |
6089
|
|
|
|
6090
|
|
|
$offset = self::strripos($str, $separator, 0, $encoding); |
6091
|
|
|
if ($offset === false) { |
6092
|
|
|
return ''; |
6093
|
|
|
} |
6094
|
|
|
|
6095
|
|
|
return (string) self::substr($str, 0, $offset, $encoding); |
6096
|
|
|
} |
6097
|
|
|
|
6098
|
|
|
/** |
6099
|
|
|
* Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle". |
6100
|
|
|
* |
6101
|
|
|
* @param string $str <p>The input string.</p> |
6102
|
|
|
* @param string $needle <p>The string to look for.</p> |
6103
|
|
|
* @param bool $beforeNeedle [optional] <p>Default: false</p> |
6104
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
6105
|
|
|
* |
6106
|
|
|
* @return string |
6107
|
|
|
*/ |
6108
|
2 |
|
public static function str_isubstr_first( |
6109
|
|
|
string $str, |
6110
|
|
|
string $needle, |
6111
|
|
|
bool $beforeNeedle = false, |
6112
|
|
|
string $encoding = 'UTF-8' |
6113
|
|
|
): string { |
6114
|
|
|
if ( |
6115
|
2 |
|
$needle === '' |
6116
|
|
|
|| |
6117
|
2 |
|
$str === '' |
6118
|
|
|
) { |
6119
|
2 |
|
return ''; |
6120
|
|
|
} |
6121
|
|
|
|
6122
|
2 |
|
$part = self::stristr( |
6123
|
2 |
|
$str, |
6124
|
2 |
|
$needle, |
6125
|
2 |
|
$beforeNeedle, |
6126
|
2 |
|
$encoding |
6127
|
|
|
); |
6128
|
2 |
|
if ($part === false) { |
6129
|
2 |
|
return ''; |
6130
|
|
|
} |
6131
|
|
|
|
6132
|
2 |
|
return $part; |
6133
|
|
|
} |
6134
|
|
|
|
6135
|
|
|
/** |
6136
|
|
|
* Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle". |
6137
|
|
|
* |
6138
|
|
|
* @param string $str <p>The input string.</p> |
6139
|
|
|
* @param string $needle <p>The string to look for.</p> |
6140
|
|
|
* @param bool $beforeNeedle [optional] <p>Default: false</p> |
6141
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
6142
|
|
|
* |
6143
|
|
|
* @return string |
6144
|
|
|
*/ |
6145
|
1 |
|
public static function str_isubstr_last( |
6146
|
|
|
string $str, |
6147
|
|
|
string $needle, |
6148
|
|
|
bool $beforeNeedle = false, |
6149
|
|
|
string $encoding = 'UTF-8' |
6150
|
|
|
): string { |
6151
|
|
|
if ( |
6152
|
1 |
|
$needle === '' |
6153
|
|
|
|| |
6154
|
1 |
|
$str === '' |
6155
|
|
|
) { |
6156
|
1 |
|
return ''; |
6157
|
|
|
} |
6158
|
|
|
|
6159
|
1 |
|
$part = self::strrichr($str, $needle, $beforeNeedle, $encoding); |
6160
|
1 |
|
if ($part === false) { |
6161
|
1 |
|
return ''; |
6162
|
|
|
} |
6163
|
|
|
|
6164
|
1 |
|
return $part; |
6165
|
|
|
} |
6166
|
|
|
|
6167
|
|
|
/** |
6168
|
|
|
* Returns the last $n characters of the string. |
6169
|
|
|
* |
6170
|
|
|
* @param string $str <p>The input string.</p> |
6171
|
|
|
* @param int $n <p>Number of characters to retrieve from the end.</p> |
6172
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6173
|
|
|
* |
6174
|
|
|
* @return string |
6175
|
|
|
*/ |
6176
|
12 |
|
public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string |
6177
|
|
|
{ |
6178
|
12 |
|
if ($str === '' || $n <= 0) { |
6179
|
4 |
|
return ''; |
6180
|
|
|
} |
6181
|
|
|
|
6182
|
8 |
|
if ($encoding === 'UTF-8') { |
6183
|
4 |
|
return (string) \mb_substr($str, -$n); |
6184
|
|
|
} |
6185
|
|
|
|
6186
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
6187
|
|
|
|
6188
|
4 |
|
return (string) self::substr($str, -$n, null, $encoding); |
6189
|
|
|
} |
6190
|
|
|
|
6191
|
|
|
/** |
6192
|
|
|
* Limit the number of characters in a string. |
6193
|
|
|
* |
6194
|
|
|
* @param string $str <p>The input string.</p> |
6195
|
|
|
* @param int $length [optional] <p>Default: 100</p> |
6196
|
|
|
* @param string $strAddOn [optional] <p>Default: …</p> |
6197
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6198
|
|
|
* |
6199
|
|
|
* @return string |
6200
|
|
|
*/ |
6201
|
2 |
|
public static function str_limit( |
6202
|
|
|
string $str, |
6203
|
|
|
int $length = 100, |
6204
|
|
|
string $strAddOn = '…', |
6205
|
|
|
string $encoding = 'UTF-8' |
6206
|
|
|
): string { |
6207
|
2 |
|
if ($str === '' || $length <= 0) { |
6208
|
2 |
|
return ''; |
6209
|
|
|
} |
6210
|
|
|
|
6211
|
2 |
|
if ($encoding === 'UTF-8') { |
6212
|
2 |
|
if ((int) \mb_strlen($str) <= $length) { |
6213
|
2 |
|
return $str; |
6214
|
|
|
} |
6215
|
|
|
|
6216
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
6217
|
2 |
|
return (string) \mb_substr($str, 0, $length - (int) self::strlen($strAddOn)) . $strAddOn; |
6218
|
|
|
} |
6219
|
|
|
|
6220
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
6221
|
|
|
|
6222
|
|
|
if ((int) self::strlen($str, $encoding) <= $length) { |
6223
|
|
|
return $str; |
6224
|
|
|
} |
6225
|
|
|
|
6226
|
|
|
return ((string) self::substr($str, 0, $length - (int) self::strlen($strAddOn), $encoding)) . $strAddOn; |
6227
|
|
|
} |
6228
|
|
|
|
6229
|
|
|
/** |
6230
|
|
|
* Limit the number of characters in a string, but also after the next word. |
6231
|
|
|
* |
6232
|
|
|
* @param string $str <p>The input string.</p> |
6233
|
|
|
* @param int $length [optional] <p>Default: 100</p> |
6234
|
|
|
* @param string $strAddOn [optional] <p>Default: …</p> |
6235
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6236
|
|
|
* |
6237
|
|
|
* @return string |
6238
|
|
|
*/ |
6239
|
6 |
|
public static function str_limit_after_word( |
6240
|
|
|
string $str, |
6241
|
|
|
int $length = 100, |
6242
|
|
|
string $strAddOn = '…', |
6243
|
|
|
string $encoding = 'UTF-8' |
6244
|
|
|
): string { |
6245
|
6 |
|
if ($str === '' || $length <= 0) { |
6246
|
2 |
|
return ''; |
6247
|
|
|
} |
6248
|
|
|
|
6249
|
6 |
|
if ($encoding === 'UTF-8') { |
6250
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
6251
|
2 |
|
if ((int) \mb_strlen($str) <= $length) { |
6252
|
2 |
|
return $str; |
6253
|
|
|
} |
6254
|
|
|
|
6255
|
2 |
|
if (\mb_substr($str, $length - 1, 1) === ' ') { |
6256
|
2 |
|
return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn; |
6257
|
|
|
} |
6258
|
|
|
|
6259
|
2 |
|
$str = \mb_substr($str, 0, $length); |
6260
|
|
|
|
6261
|
2 |
|
$array = \explode(' ', $str); |
6262
|
2 |
|
\array_pop($array); |
6263
|
2 |
|
$new_str = \implode(' ', $array); |
6264
|
|
|
|
6265
|
2 |
|
if ($new_str === '') { |
6266
|
2 |
|
return ((string) \mb_substr($str, 0, $length - 1)) . $strAddOn; |
6267
|
|
|
} |
6268
|
|
|
} else { |
6269
|
4 |
|
if ((int) self::strlen($str, $encoding) <= $length) { |
6270
|
|
|
return $str; |
6271
|
|
|
} |
6272
|
|
|
|
6273
|
4 |
|
if (self::substr($str, $length - 1, 1, $encoding) === ' ') { |
6274
|
3 |
|
return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn; |
6275
|
|
|
} |
6276
|
|
|
|
6277
|
1 |
|
$str = self::substr($str, 0, $length, $encoding); |
6278
|
1 |
|
if ($str === false) { |
6279
|
|
|
return '' . $strAddOn; |
6280
|
|
|
} |
6281
|
|
|
|
6282
|
1 |
|
$array = \explode(' ', $str); |
6283
|
1 |
|
\array_pop($array); |
6284
|
1 |
|
$new_str = \implode(' ', $array); |
6285
|
|
|
|
6286
|
1 |
|
if ($new_str === '') { |
6287
|
|
|
return ((string) self::substr($str, 0, $length - 1, $encoding)) . $strAddOn; |
6288
|
|
|
} |
6289
|
|
|
} |
6290
|
|
|
|
6291
|
3 |
|
return $new_str . $strAddOn; |
6292
|
|
|
} |
6293
|
|
|
|
6294
|
|
|
/** |
6295
|
|
|
* Returns the longest common prefix between the string and $otherStr. |
6296
|
|
|
* |
6297
|
|
|
* @param string $str <p>The input sting.</p> |
6298
|
|
|
* @param string $otherStr <p>Second string for comparison.</p> |
6299
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6300
|
|
|
* |
6301
|
|
|
* @return string |
6302
|
|
|
*/ |
6303
|
10 |
|
public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string |
6304
|
|
|
{ |
6305
|
|
|
// init |
6306
|
10 |
|
$longestCommonPrefix = ''; |
6307
|
|
|
|
6308
|
10 |
|
if ($encoding === 'UTF-8') { |
6309
|
5 |
|
$maxLength = (int) \min( |
6310
|
5 |
|
\mb_strlen($str), |
6311
|
5 |
|
\mb_strlen($otherStr) |
6312
|
|
|
); |
6313
|
|
|
|
6314
|
5 |
|
for ($i = 0; $i < $maxLength; ++$i) { |
6315
|
4 |
|
$char = \mb_substr($str, $i, 1); |
6316
|
|
|
|
6317
|
|
|
if ( |
6318
|
4 |
|
$char !== false |
6319
|
|
|
&& |
6320
|
4 |
|
$char === \mb_substr($otherStr, $i, 1) |
6321
|
|
|
) { |
6322
|
3 |
|
$longestCommonPrefix .= $char; |
6323
|
|
|
} else { |
6324
|
3 |
|
break; |
6325
|
|
|
} |
6326
|
|
|
} |
6327
|
|
|
} else { |
6328
|
5 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
6329
|
|
|
|
6330
|
5 |
|
$maxLength = (int) \min( |
6331
|
5 |
|
self::strlen($str, $encoding), |
6332
|
5 |
|
self::strlen($otherStr, $encoding) |
6333
|
|
|
); |
6334
|
|
|
|
6335
|
5 |
|
for ($i = 0; $i < $maxLength; ++$i) { |
6336
|
4 |
|
$char = self::substr($str, $i, 1, $encoding); |
6337
|
|
|
|
6338
|
|
|
if ( |
6339
|
4 |
|
$char !== false |
6340
|
|
|
&& |
6341
|
4 |
|
$char === self::substr($otherStr, $i, 1, $encoding) |
6342
|
|
|
) { |
6343
|
3 |
|
$longestCommonPrefix .= $char; |
6344
|
|
|
} else { |
6345
|
3 |
|
break; |
6346
|
|
|
} |
6347
|
|
|
} |
6348
|
|
|
} |
6349
|
|
|
|
6350
|
10 |
|
return $longestCommonPrefix; |
6351
|
|
|
} |
6352
|
|
|
|
6353
|
|
|
/** |
6354
|
|
|
* Returns the longest common substring between the string and $otherStr. |
6355
|
|
|
* In the case of ties, it returns that which occurs first. |
6356
|
|
|
* |
6357
|
|
|
* @param string $str |
6358
|
|
|
* @param string $otherStr <p>Second string for comparison.</p> |
6359
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6360
|
|
|
* |
6361
|
|
|
* @return string string with its $str being the longest common substring |
6362
|
|
|
*/ |
6363
|
11 |
|
public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string |
6364
|
|
|
{ |
6365
|
11 |
|
if ($str === '' || $otherStr === '') { |
6366
|
2 |
|
return ''; |
6367
|
|
|
} |
6368
|
|
|
|
6369
|
|
|
// Uses dynamic programming to solve |
6370
|
|
|
// http://en.wikipedia.org/wiki/Longest_common_substring_problem |
6371
|
|
|
|
6372
|
9 |
|
if ($encoding === 'UTF-8') { |
6373
|
4 |
|
$strLength = (int) \mb_strlen($str); |
6374
|
4 |
|
$otherLength = (int) \mb_strlen($otherStr); |
6375
|
|
|
} else { |
6376
|
5 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
6377
|
|
|
|
6378
|
5 |
|
$strLength = (int) self::strlen($str, $encoding); |
6379
|
5 |
|
$otherLength = (int) self::strlen($otherStr, $encoding); |
6380
|
|
|
} |
6381
|
|
|
|
6382
|
|
|
// Return if either string is empty |
6383
|
9 |
|
if ($strLength === 0 || $otherLength === 0) { |
6384
|
|
|
return ''; |
6385
|
|
|
} |
6386
|
|
|
|
6387
|
9 |
|
$len = 0; |
6388
|
9 |
|
$end = 0; |
6389
|
9 |
|
$table = \array_fill( |
6390
|
9 |
|
0, |
6391
|
9 |
|
$strLength + 1, |
6392
|
9 |
|
\array_fill(0, $otherLength + 1, 0) |
6393
|
|
|
); |
6394
|
|
|
|
6395
|
9 |
|
if ($encoding === 'UTF-8') { |
6396
|
9 |
|
for ($i = 1; $i <= $strLength; ++$i) { |
6397
|
9 |
|
for ($j = 1; $j <= $otherLength; ++$j) { |
6398
|
9 |
|
$strChar = \mb_substr($str, $i - 1, 1); |
6399
|
9 |
|
$otherChar = \mb_substr($otherStr, $j - 1, 1); |
6400
|
|
|
|
6401
|
9 |
|
if ($strChar === $otherChar) { |
6402
|
8 |
|
$table[$i][$j] = $table[$i - 1][$j - 1] + 1; |
6403
|
8 |
|
if ($table[$i][$j] > $len) { |
6404
|
8 |
|
$len = $table[$i][$j]; |
6405
|
8 |
|
$end = $i; |
6406
|
|
|
} |
6407
|
|
|
} else { |
6408
|
9 |
|
$table[$i][$j] = 0; |
6409
|
|
|
} |
6410
|
|
|
} |
6411
|
|
|
} |
6412
|
|
|
} else { |
6413
|
|
|
for ($i = 1; $i <= $strLength; ++$i) { |
6414
|
|
|
for ($j = 1; $j <= $otherLength; ++$j) { |
6415
|
|
|
$strChar = self::substr($str, $i - 1, 1, $encoding); |
6416
|
|
|
$otherChar = self::substr($otherStr, $j - 1, 1, $encoding); |
6417
|
|
|
|
6418
|
|
|
if ($strChar === $otherChar) { |
6419
|
|
|
$table[$i][$j] = $table[$i - 1][$j - 1] + 1; |
6420
|
|
|
if ($table[$i][$j] > $len) { |
6421
|
|
|
$len = $table[$i][$j]; |
6422
|
|
|
$end = $i; |
6423
|
|
|
} |
6424
|
|
|
} else { |
6425
|
|
|
$table[$i][$j] = 0; |
6426
|
|
|
} |
6427
|
|
|
} |
6428
|
|
|
} |
6429
|
|
|
} |
6430
|
|
|
|
6431
|
9 |
|
if ($encoding === 'UTF-8') { |
6432
|
9 |
|
return (string) \mb_substr($str, $end - $len, $len); |
6433
|
|
|
} |
6434
|
|
|
|
6435
|
|
|
return (string) self::substr($str, $end - $len, $len, $encoding); |
6436
|
|
|
} |
6437
|
|
|
|
6438
|
|
|
/** |
6439
|
|
|
* Returns the longest common suffix between the string and $otherStr. |
6440
|
|
|
* |
6441
|
|
|
* @param string $str |
6442
|
|
|
* @param string $otherStr <p>Second string for comparison.</p> |
6443
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6444
|
|
|
* |
6445
|
|
|
* @return string |
6446
|
|
|
*/ |
6447
|
10 |
|
public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string |
6448
|
|
|
{ |
6449
|
10 |
|
if ($str === '' || $otherStr === '') { |
6450
|
2 |
|
return ''; |
6451
|
|
|
} |
6452
|
|
|
|
6453
|
8 |
|
if ($encoding === 'UTF-8') { |
6454
|
4 |
|
$maxLength = (int) \min( |
6455
|
4 |
|
\mb_strlen($str, $encoding), |
6456
|
4 |
|
\mb_strlen($otherStr, $encoding) |
6457
|
|
|
); |
6458
|
|
|
|
6459
|
4 |
|
$longestCommonSuffix = ''; |
6460
|
4 |
|
for ($i = 1; $i <= $maxLength; ++$i) { |
6461
|
4 |
|
$char = \mb_substr($str, -$i, 1); |
6462
|
|
|
|
6463
|
|
|
if ( |
6464
|
4 |
|
$char !== false |
6465
|
|
|
&& |
6466
|
4 |
|
$char === \mb_substr($otherStr, -$i, 1) |
6467
|
|
|
) { |
6468
|
3 |
|
$longestCommonSuffix = $char . $longestCommonSuffix; |
6469
|
|
|
} else { |
6470
|
3 |
|
break; |
6471
|
|
|
} |
6472
|
|
|
} |
6473
|
|
|
} else { |
6474
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
6475
|
|
|
|
6476
|
4 |
|
$maxLength = (int) \min( |
6477
|
4 |
|
self::strlen($str, $encoding), |
6478
|
4 |
|
self::strlen($otherStr, $encoding) |
6479
|
|
|
); |
6480
|
|
|
|
6481
|
4 |
|
$longestCommonSuffix = ''; |
6482
|
4 |
|
for ($i = 1; $i <= $maxLength; ++$i) { |
6483
|
4 |
|
$char = self::substr($str, -$i, 1, $encoding); |
6484
|
|
|
|
6485
|
|
|
if ( |
6486
|
4 |
|
$char !== false |
6487
|
|
|
&& |
6488
|
4 |
|
$char === self::substr($otherStr, -$i, 1, $encoding) |
6489
|
|
|
) { |
6490
|
3 |
|
$longestCommonSuffix = $char . $longestCommonSuffix; |
6491
|
|
|
} else { |
6492
|
3 |
|
break; |
6493
|
|
|
} |
6494
|
|
|
} |
6495
|
|
|
} |
6496
|
|
|
|
6497
|
8 |
|
return $longestCommonSuffix; |
6498
|
|
|
} |
6499
|
|
|
|
6500
|
|
|
/** |
6501
|
|
|
* Returns true if $str matches the supplied pattern, false otherwise. |
6502
|
|
|
* |
6503
|
|
|
* @param string $str <p>The input string.</p> |
6504
|
|
|
* @param string $pattern <p>Regex pattern to match against.</p> |
6505
|
|
|
* |
6506
|
|
|
* @return bool whether or not $str matches the pattern |
6507
|
|
|
*/ |
6508
|
|
|
public static function str_matches_pattern(string $str, string $pattern): bool |
6509
|
|
|
{ |
6510
|
|
|
return (bool) \preg_match('/' . $pattern . '/u', $str); |
6511
|
|
|
} |
6512
|
|
|
|
6513
|
|
|
/** |
6514
|
|
|
* Returns whether or not a character exists at an index. Offsets may be |
6515
|
|
|
* negative to count from the last character in the string. Implements |
6516
|
|
|
* part of the ArrayAccess interface. |
6517
|
|
|
* |
6518
|
|
|
* @param string $str <p>The input string.</p> |
6519
|
|
|
* @param int $offset <p>The index to check.</p> |
6520
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6521
|
|
|
* |
6522
|
|
|
* @return bool whether or not the index exists |
6523
|
|
|
*/ |
6524
|
6 |
|
public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool |
6525
|
|
|
{ |
6526
|
|
|
// init |
6527
|
6 |
|
$length = (int) self::strlen($str, $encoding); |
6528
|
|
|
|
6529
|
6 |
|
if ($offset >= 0) { |
6530
|
3 |
|
return $length > $offset; |
6531
|
|
|
} |
6532
|
|
|
|
6533
|
3 |
|
return $length >= \abs($offset); |
6534
|
|
|
} |
6535
|
|
|
|
6536
|
|
|
/** |
6537
|
|
|
* Returns the character at the given index. Offsets may be negative to |
6538
|
|
|
* count from the last character in the string. Implements part of the |
6539
|
|
|
* ArrayAccess interface, and throws an OutOfBoundsException if the index |
6540
|
|
|
* does not exist. |
6541
|
|
|
* |
6542
|
|
|
* @param string $str <p>The input string.</p> |
6543
|
|
|
* @param int $index <p>The <strong>index</strong> from which to retrieve the char.</p> |
6544
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6545
|
|
|
* |
6546
|
|
|
* @throws \OutOfBoundsException if the positive or negative offset does not exist |
6547
|
|
|
* |
6548
|
|
|
* @return string the character at the specified index |
6549
|
|
|
*/ |
6550
|
2 |
|
public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string |
6551
|
|
|
{ |
6552
|
|
|
// init |
6553
|
2 |
|
$length = (int) self::strlen($str); |
6554
|
|
|
|
6555
|
|
|
if ( |
6556
|
2 |
|
($index >= 0 && $length <= $index) |
6557
|
|
|
|| |
6558
|
2 |
|
$length < \abs($index) |
6559
|
|
|
) { |
6560
|
1 |
|
throw new \OutOfBoundsException('No character exists at the index'); |
6561
|
|
|
} |
6562
|
|
|
|
6563
|
1 |
|
return self::char_at($str, $index, $encoding); |
6564
|
|
|
} |
6565
|
|
|
|
6566
|
|
|
/** |
6567
|
|
|
* Pad a UTF-8 string to given length with another string. |
6568
|
|
|
* |
6569
|
|
|
* @param string $str <p>The input string.</p> |
6570
|
|
|
* @param int $pad_length <p>The length of return string.</p> |
6571
|
|
|
* @param string $pad_string [optional] <p>String to use for padding the input string.</p> |
6572
|
|
|
* @param int|string $pad_type [optional] <p> |
6573
|
|
|
* Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br> |
6574
|
|
|
* <strong>STR_PAD_LEFT</strong> [or string "left"] or<br> |
6575
|
|
|
* <strong>STR_PAD_BOTH</strong> [or string "both"] |
6576
|
|
|
* </p> |
6577
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
6578
|
|
|
* |
6579
|
|
|
* @return string returns the padded string |
6580
|
|
|
*/ |
6581
|
41 |
|
public static function str_pad( |
6582
|
|
|
string $str, |
6583
|
|
|
int $pad_length, |
6584
|
|
|
string $pad_string = ' ', |
6585
|
|
|
$pad_type = \STR_PAD_RIGHT, |
6586
|
|
|
string $encoding = 'UTF-8' |
6587
|
|
|
): string { |
6588
|
41 |
|
if ($pad_length === 0 || $pad_string === '') { |
6589
|
1 |
|
return $str; |
6590
|
|
|
} |
6591
|
|
|
|
6592
|
41 |
|
if ($pad_type !== (int) $pad_type) { |
6593
|
13 |
|
if ($pad_type === 'left') { |
6594
|
3 |
|
$pad_type = \STR_PAD_LEFT; |
6595
|
10 |
|
} elseif ($pad_type === 'right') { |
6596
|
6 |
|
$pad_type = \STR_PAD_RIGHT; |
6597
|
4 |
|
} elseif ($pad_type === 'both') { |
6598
|
3 |
|
$pad_type = \STR_PAD_BOTH; |
6599
|
|
|
} else { |
6600
|
1 |
|
throw new \InvalidArgumentException( |
6601
|
1 |
|
'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'" |
6602
|
|
|
); |
6603
|
|
|
} |
6604
|
|
|
} |
6605
|
|
|
|
6606
|
40 |
|
if ($encoding === 'UTF-8') { |
6607
|
25 |
|
$str_length = (int) \mb_strlen($str); |
6608
|
|
|
|
6609
|
25 |
|
if ($pad_length >= $str_length) { |
6610
|
|
|
switch ($pad_type) { |
6611
|
25 |
|
case \STR_PAD_LEFT: |
6612
|
8 |
|
$ps_length = (int) \mb_strlen($pad_string); |
6613
|
|
|
|
6614
|
8 |
|
$diff = ($pad_length - $str_length); |
6615
|
|
|
|
6616
|
8 |
|
$pre = (string) \mb_substr( |
6617
|
8 |
|
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)), |
6618
|
8 |
|
0, |
6619
|
8 |
|
$diff |
6620
|
|
|
); |
6621
|
8 |
|
$post = ''; |
6622
|
|
|
|
6623
|
8 |
|
break; |
6624
|
|
|
|
6625
|
20 |
|
case \STR_PAD_BOTH: |
6626
|
14 |
|
$diff = ($pad_length - $str_length); |
6627
|
|
|
|
6628
|
14 |
|
$ps_length_left = (int) \floor($diff / 2); |
6629
|
|
|
|
6630
|
14 |
|
$ps_length_right = (int) \ceil($diff / 2); |
6631
|
|
|
|
6632
|
14 |
|
$pre = (string) \mb_substr( |
6633
|
14 |
|
\str_repeat($pad_string, $ps_length_left), |
6634
|
14 |
|
0, |
6635
|
14 |
|
$ps_length_left |
6636
|
|
|
); |
6637
|
14 |
|
$post = (string) \mb_substr( |
6638
|
14 |
|
\str_repeat($pad_string, $ps_length_right), |
6639
|
14 |
|
0, |
6640
|
14 |
|
$ps_length_right |
6641
|
|
|
); |
6642
|
|
|
|
6643
|
14 |
|
break; |
6644
|
|
|
|
6645
|
9 |
|
case \STR_PAD_RIGHT: |
6646
|
|
|
default: |
6647
|
9 |
|
$ps_length = (int) \mb_strlen($pad_string); |
6648
|
|
|
|
6649
|
9 |
|
$diff = ($pad_length - $str_length); |
6650
|
|
|
|
6651
|
9 |
|
$post = (string) \mb_substr( |
6652
|
9 |
|
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)), |
6653
|
9 |
|
0, |
6654
|
9 |
|
$diff |
6655
|
|
|
); |
6656
|
9 |
|
$pre = ''; |
6657
|
|
|
} |
6658
|
|
|
|
6659
|
25 |
|
return $pre . $str . $post; |
6660
|
|
|
} |
6661
|
|
|
|
6662
|
3 |
|
return $str; |
6663
|
|
|
} |
6664
|
|
|
|
6665
|
15 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
6666
|
|
|
|
6667
|
15 |
|
$str_length = (int) self::strlen($str, $encoding); |
6668
|
|
|
|
6669
|
15 |
|
if ($pad_length >= $str_length) { |
6670
|
|
|
switch ($pad_type) { |
6671
|
14 |
|
case \STR_PAD_LEFT: |
6672
|
5 |
|
$ps_length = (int) self::strlen($pad_string, $encoding); |
6673
|
|
|
|
6674
|
5 |
|
$diff = ($pad_length - $str_length); |
6675
|
|
|
|
6676
|
5 |
|
$pre = (string) self::substr( |
6677
|
5 |
|
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)), |
6678
|
5 |
|
0, |
6679
|
5 |
|
$diff, |
6680
|
5 |
|
$encoding |
6681
|
|
|
); |
6682
|
5 |
|
$post = ''; |
6683
|
|
|
|
6684
|
5 |
|
break; |
6685
|
|
|
|
6686
|
9 |
|
case \STR_PAD_BOTH: |
6687
|
3 |
|
$diff = ($pad_length - $str_length); |
6688
|
|
|
|
6689
|
3 |
|
$ps_length_left = (int) \floor($diff / 2); |
6690
|
|
|
|
6691
|
3 |
|
$ps_length_right = (int) \ceil($diff / 2); |
6692
|
|
|
|
6693
|
3 |
|
$pre = (string) self::substr( |
6694
|
3 |
|
\str_repeat($pad_string, $ps_length_left), |
6695
|
3 |
|
0, |
6696
|
3 |
|
$ps_length_left, |
6697
|
3 |
|
$encoding |
6698
|
|
|
); |
6699
|
3 |
|
$post = (string) self::substr( |
6700
|
3 |
|
\str_repeat($pad_string, $ps_length_right), |
6701
|
3 |
|
0, |
6702
|
3 |
|
$ps_length_right, |
6703
|
3 |
|
$encoding |
6704
|
|
|
); |
6705
|
|
|
|
6706
|
3 |
|
break; |
6707
|
|
|
|
6708
|
6 |
|
case \STR_PAD_RIGHT: |
6709
|
|
|
default: |
6710
|
6 |
|
$ps_length = (int) self::strlen($pad_string, $encoding); |
6711
|
|
|
|
6712
|
6 |
|
$diff = ($pad_length - $str_length); |
6713
|
|
|
|
6714
|
6 |
|
$post = (string) self::substr( |
6715
|
6 |
|
\str_repeat($pad_string, (int) \ceil($diff / $ps_length)), |
6716
|
6 |
|
0, |
6717
|
6 |
|
$diff, |
6718
|
6 |
|
$encoding |
6719
|
|
|
); |
6720
|
6 |
|
$pre = ''; |
6721
|
|
|
} |
6722
|
|
|
|
6723
|
14 |
|
return $pre . $str . $post; |
6724
|
|
|
} |
6725
|
|
|
|
6726
|
1 |
|
return $str; |
6727
|
|
|
} |
6728
|
|
|
|
6729
|
|
|
/** |
6730
|
|
|
* Returns a new string of a given length such that both sides of the |
6731
|
|
|
* string are padded. Alias for pad() with a $padType of 'both'. |
6732
|
|
|
* |
6733
|
|
|
* @param string $str |
6734
|
|
|
* @param int $length <p>Desired string length after padding.</p> |
6735
|
|
|
* @param string $padStr [optional] <p>String used to pad, defaults to space. Default: ' '</p> |
6736
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6737
|
|
|
* |
6738
|
|
|
* @return string string with padding applied |
6739
|
|
|
*/ |
6740
|
11 |
|
public static function str_pad_both( |
6741
|
|
|
string $str, |
6742
|
|
|
int $length, |
6743
|
|
|
string $padStr = ' ', |
6744
|
|
|
string $encoding = 'UTF-8' |
6745
|
|
|
): string { |
6746
|
11 |
|
return self::str_pad($str, $length, $padStr, \STR_PAD_BOTH, $encoding); |
6747
|
|
|
} |
6748
|
|
|
|
6749
|
|
|
/** |
6750
|
|
|
* Returns a new string of a given length such that the beginning of the |
6751
|
|
|
* string is padded. Alias for pad() with a $padType of 'left'. |
6752
|
|
|
* |
6753
|
|
|
* @param string $str |
6754
|
|
|
* @param int $length <p>Desired string length after padding.</p> |
6755
|
|
|
* @param string $padStr [optional] <p>String used to pad, defaults to space. Default: ' '</p> |
6756
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6757
|
|
|
* |
6758
|
|
|
* @return string string with left padding |
6759
|
|
|
*/ |
6760
|
7 |
|
public static function str_pad_left( |
6761
|
|
|
string $str, |
6762
|
|
|
int $length, |
6763
|
|
|
string $padStr = ' ', |
6764
|
|
|
string $encoding = 'UTF-8' |
6765
|
|
|
): string { |
6766
|
7 |
|
return self::str_pad($str, $length, $padStr, \STR_PAD_LEFT, $encoding); |
6767
|
|
|
} |
6768
|
|
|
|
6769
|
|
|
/** |
6770
|
|
|
* Returns a new string of a given length such that the end of the string |
6771
|
|
|
* is padded. Alias for pad() with a $padType of 'right'. |
6772
|
|
|
* |
6773
|
|
|
* @param string $str |
6774
|
|
|
* @param int $length <p>Desired string length after padding.</p> |
6775
|
|
|
* @param string $padStr [optional] <p>String used to pad, defaults to space. Default: ' '</p> |
6776
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6777
|
|
|
* |
6778
|
|
|
* @return string string with right padding |
6779
|
|
|
*/ |
6780
|
7 |
|
public static function str_pad_right( |
6781
|
|
|
string $str, |
6782
|
|
|
int $length, |
6783
|
|
|
string $padStr = ' ', |
6784
|
|
|
string $encoding = 'UTF-8' |
6785
|
|
|
): string { |
6786
|
7 |
|
return self::str_pad($str, $length, $padStr, \STR_PAD_RIGHT, $encoding); |
6787
|
|
|
} |
6788
|
|
|
|
6789
|
|
|
/** |
6790
|
|
|
* Repeat a string. |
6791
|
|
|
* |
6792
|
|
|
* @param string $str <p> |
6793
|
|
|
* The string to be repeated. |
6794
|
|
|
* </p> |
6795
|
|
|
* @param int $multiplier <p> |
6796
|
|
|
* Number of time the input string should be |
6797
|
|
|
* repeated. |
6798
|
|
|
* </p> |
6799
|
|
|
* <p> |
6800
|
|
|
* multiplier has to be greater than or equal to 0. |
6801
|
|
|
* If the multiplier is set to 0, the function |
6802
|
|
|
* will return an empty string. |
6803
|
|
|
* </p> |
6804
|
|
|
* |
6805
|
|
|
* @return string the repeated string |
6806
|
|
|
*/ |
6807
|
9 |
|
public static function str_repeat(string $str, int $multiplier): string |
6808
|
|
|
{ |
6809
|
9 |
|
$str = self::filter($str); |
6810
|
|
|
|
6811
|
9 |
|
return \str_repeat($str, $multiplier); |
6812
|
|
|
} |
6813
|
|
|
|
6814
|
|
|
/** |
6815
|
|
|
* INFO: This is only a wrapper for "str_replace()" -> the original functions is already UTF-8 safe. |
6816
|
|
|
* |
6817
|
|
|
* Replace all occurrences of the search string with the replacement string |
6818
|
|
|
* |
6819
|
|
|
* @see http://php.net/manual/en/function.str-replace.php |
6820
|
|
|
* |
6821
|
|
|
* @param mixed $search <p> |
6822
|
|
|
* The value being searched for, otherwise known as the needle. |
6823
|
|
|
* An array may be used to designate multiple needles. |
6824
|
|
|
* </p> |
6825
|
|
|
* @param mixed $replace <p> |
6826
|
|
|
* The replacement value that replaces found search |
6827
|
|
|
* values. An array may be used to designate multiple replacements. |
6828
|
|
|
* </p> |
6829
|
|
|
* @param mixed $subject <p> |
6830
|
|
|
* The string or array being searched and replaced on, |
6831
|
|
|
* otherwise known as the haystack. |
6832
|
|
|
* </p> |
6833
|
|
|
* <p> |
6834
|
|
|
* If subject is an array, then the search and |
6835
|
|
|
* replace is performed with every entry of |
6836
|
|
|
* subject, and the return value is an array as |
6837
|
|
|
* well. |
6838
|
|
|
* </p> |
6839
|
|
|
* @param int $count [optional] If passed, this will hold the number of matched and replaced needles |
6840
|
|
|
* |
6841
|
|
|
* @return mixed this function returns a string or an array with the replaced values |
6842
|
|
|
*/ |
6843
|
12 |
|
public static function str_replace( |
6844
|
|
|
$search, |
6845
|
|
|
$replace, |
6846
|
|
|
$subject, |
6847
|
|
|
int &$count = null |
6848
|
|
|
) { |
6849
|
|
|
/** |
6850
|
|
|
* @psalm-suppress PossiblyNullArgument |
6851
|
|
|
*/ |
6852
|
12 |
|
return \str_replace( |
6853
|
12 |
|
$search, |
6854
|
12 |
|
$replace, |
6855
|
12 |
|
$subject, |
6856
|
12 |
|
$count |
6857
|
|
|
); |
6858
|
|
|
} |
6859
|
|
|
|
6860
|
|
|
/** |
6861
|
|
|
* Replaces $search from the beginning of string with $replacement. |
6862
|
|
|
* |
6863
|
|
|
* @param string $str <p>The input string.</p> |
6864
|
|
|
* @param string $search <p>The string to search for.</p> |
6865
|
|
|
* @param string $replacement <p>The replacement.</p> |
6866
|
|
|
* |
6867
|
|
|
* @return string string after the replacements |
6868
|
|
|
*/ |
6869
|
17 |
|
public static function str_replace_beginning(string $str, string $search, string $replacement): string |
6870
|
|
|
{ |
6871
|
17 |
|
if ($str === '') { |
6872
|
4 |
|
if ($replacement === '') { |
6873
|
2 |
|
return ''; |
6874
|
|
|
} |
6875
|
|
|
|
6876
|
2 |
|
if ($search === '') { |
6877
|
2 |
|
return $replacement; |
6878
|
|
|
} |
6879
|
|
|
} |
6880
|
|
|
|
6881
|
13 |
|
if ($search === '') { |
6882
|
2 |
|
return $str . $replacement; |
6883
|
|
|
} |
6884
|
|
|
|
6885
|
11 |
|
if (\strpos($str, $search) === 0) { |
6886
|
9 |
|
return $replacement . \substr($str, \strlen($search)); |
6887
|
|
|
} |
6888
|
|
|
|
6889
|
2 |
|
return $str; |
6890
|
|
|
} |
6891
|
|
|
|
6892
|
|
|
/** |
6893
|
|
|
* Replaces $search from the ending of string with $replacement. |
6894
|
|
|
* |
6895
|
|
|
* @param string $str <p>The input string.</p> |
6896
|
|
|
* @param string $search <p>The string to search for.</p> |
6897
|
|
|
* @param string $replacement <p>The replacement.</p> |
6898
|
|
|
* |
6899
|
|
|
* @return string string after the replacements |
6900
|
|
|
*/ |
6901
|
17 |
|
public static function str_replace_ending(string $str, string $search, string $replacement): string |
6902
|
|
|
{ |
6903
|
17 |
|
if ($str === '') { |
6904
|
4 |
|
if ($replacement === '') { |
6905
|
2 |
|
return ''; |
6906
|
|
|
} |
6907
|
|
|
|
6908
|
2 |
|
if ($search === '') { |
6909
|
2 |
|
return $replacement; |
6910
|
|
|
} |
6911
|
|
|
} |
6912
|
|
|
|
6913
|
13 |
|
if ($search === '') { |
6914
|
2 |
|
return $str . $replacement; |
6915
|
|
|
} |
6916
|
|
|
|
6917
|
11 |
|
if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) { |
6918
|
8 |
|
$str = \substr($str, 0, -\strlen($search)) . $replacement; |
6919
|
|
|
} |
6920
|
|
|
|
6921
|
11 |
|
return $str; |
6922
|
|
|
} |
6923
|
|
|
|
6924
|
|
|
/** |
6925
|
|
|
* Replace the first "$search"-term with the "$replace"-term. |
6926
|
|
|
* |
6927
|
|
|
* @param string $search |
6928
|
|
|
* @param string $replace |
6929
|
|
|
* @param string $subject |
6930
|
|
|
* |
6931
|
|
|
* @return string |
6932
|
|
|
* |
6933
|
|
|
* @psalm-suppress InvalidReturnType |
6934
|
|
|
*/ |
6935
|
2 |
|
public static function str_replace_first(string $search, string $replace, string $subject): string |
6936
|
|
|
{ |
6937
|
2 |
|
$pos = self::strpos($subject, $search); |
6938
|
|
|
|
6939
|
2 |
|
if ($pos !== false) { |
6940
|
|
|
/** |
6941
|
|
|
* @psalm-suppress InvalidReturnStatement |
6942
|
|
|
*/ |
6943
|
2 |
|
return self::substr_replace( |
|
|
|
|
6944
|
2 |
|
$subject, |
6945
|
2 |
|
$replace, |
6946
|
2 |
|
$pos, |
6947
|
2 |
|
(int) self::strlen($search) |
6948
|
|
|
); |
6949
|
|
|
} |
6950
|
|
|
|
6951
|
2 |
|
return $subject; |
6952
|
|
|
} |
6953
|
|
|
|
6954
|
|
|
/** |
6955
|
|
|
* Replace the last "$search"-term with the "$replace"-term. |
6956
|
|
|
* |
6957
|
|
|
* @param string $search |
6958
|
|
|
* @param string $replace |
6959
|
|
|
* @param string $subject |
6960
|
|
|
* |
6961
|
|
|
* @return string |
6962
|
|
|
* |
6963
|
|
|
* @psalm-suppress InvalidReturnType |
6964
|
|
|
*/ |
6965
|
2 |
|
public static function str_replace_last( |
6966
|
|
|
string $search, |
6967
|
|
|
string $replace, |
6968
|
|
|
string $subject |
6969
|
|
|
): string { |
6970
|
2 |
|
$pos = self::strrpos($subject, $search); |
6971
|
2 |
|
if ($pos !== false) { |
6972
|
|
|
/** |
6973
|
|
|
* @psalm-suppress InvalidReturnStatement |
6974
|
|
|
*/ |
6975
|
2 |
|
return self::substr_replace( |
|
|
|
|
6976
|
2 |
|
$subject, |
6977
|
2 |
|
$replace, |
6978
|
2 |
|
$pos, |
6979
|
2 |
|
(int) self::strlen($search) |
6980
|
|
|
); |
6981
|
|
|
} |
6982
|
|
|
|
6983
|
2 |
|
return $subject; |
6984
|
|
|
} |
6985
|
|
|
|
6986
|
|
|
/** |
6987
|
|
|
* Shuffles all the characters in the string. |
6988
|
|
|
* |
6989
|
|
|
* PS: uses random algorithm which is weak for cryptography purposes |
6990
|
|
|
* |
6991
|
|
|
* @param string $str <p>The input string</p> |
6992
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6993
|
|
|
* |
6994
|
|
|
* @return string the shuffled string |
6995
|
|
|
*/ |
6996
|
5 |
|
public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string |
6997
|
|
|
{ |
6998
|
5 |
|
if ($encoding === 'UTF-8') { |
6999
|
5 |
|
$indexes = \range(0, (int) \mb_strlen($str) - 1); |
7000
|
|
|
/** @noinspection NonSecureShuffleUsageInspection */ |
7001
|
5 |
|
\shuffle($indexes); |
7002
|
|
|
|
7003
|
|
|
// init |
7004
|
5 |
|
$shuffledStr = ''; |
7005
|
|
|
|
7006
|
5 |
|
foreach ($indexes as &$i) { |
7007
|
5 |
|
$tmpSubStr = \mb_substr($str, $i, 1); |
7008
|
5 |
|
if ($tmpSubStr !== false) { |
7009
|
5 |
|
$shuffledStr .= $tmpSubStr; |
7010
|
|
|
} |
7011
|
|
|
} |
7012
|
|
|
} else { |
7013
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
7014
|
|
|
|
7015
|
|
|
$indexes = \range(0, (int) self::strlen($str, $encoding) - 1); |
7016
|
|
|
/** @noinspection NonSecureShuffleUsageInspection */ |
7017
|
|
|
\shuffle($indexes); |
7018
|
|
|
|
7019
|
|
|
// init |
7020
|
|
|
$shuffledStr = ''; |
7021
|
|
|
|
7022
|
|
|
foreach ($indexes as &$i) { |
7023
|
|
|
$tmpSubStr = self::substr($str, $i, 1, $encoding); |
7024
|
|
|
if ($tmpSubStr !== false) { |
7025
|
|
|
$shuffledStr .= $tmpSubStr; |
7026
|
|
|
} |
7027
|
|
|
} |
7028
|
|
|
} |
7029
|
|
|
|
7030
|
5 |
|
return $shuffledStr; |
7031
|
|
|
} |
7032
|
|
|
|
7033
|
|
|
/** |
7034
|
|
|
* Returns the substring beginning at $start, and up to, but not including |
7035
|
|
|
* the index specified by $end. If $end is omitted, the function extracts |
7036
|
|
|
* the remaining string. If $end is negative, it is computed from the end |
7037
|
|
|
* of the string. |
7038
|
|
|
* |
7039
|
|
|
* @param string $str |
7040
|
|
|
* @param int $start <p>Initial index from which to begin extraction.</p> |
7041
|
|
|
* @param int $end [optional] <p>Index at which to end extraction. Default: null</p> |
7042
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
7043
|
|
|
* |
7044
|
|
|
* @return false|string |
7045
|
|
|
* <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i> |
7046
|
|
|
* characters long, <b>FALSE</b> will be returned. |
7047
|
|
|
*/ |
7048
|
18 |
|
public static function str_slice( |
7049
|
|
|
string $str, |
7050
|
|
|
int $start, |
7051
|
|
|
int $end = null, |
7052
|
|
|
string $encoding = 'UTF-8' |
7053
|
|
|
) { |
7054
|
18 |
|
if ($encoding === 'UTF-8') { |
7055
|
7 |
|
if ($end === null) { |
7056
|
1 |
|
$length = (int) \mb_strlen($str); |
7057
|
6 |
|
} elseif ($end >= 0 && $end <= $start) { |
7058
|
2 |
|
return ''; |
7059
|
4 |
|
} elseif ($end < 0) { |
7060
|
1 |
|
$length = (int) \mb_strlen($str) + $end - $start; |
7061
|
|
|
} else { |
7062
|
3 |
|
$length = $end - $start; |
7063
|
|
|
} |
7064
|
|
|
|
7065
|
5 |
|
return \mb_substr($str, $start, $length); |
7066
|
|
|
} |
7067
|
|
|
|
7068
|
11 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
7069
|
|
|
|
7070
|
11 |
|
if ($end === null) { |
7071
|
5 |
|
$length = (int) self::strlen($str, $encoding); |
7072
|
6 |
|
} elseif ($end >= 0 && $end <= $start) { |
7073
|
2 |
|
return ''; |
7074
|
4 |
|
} elseif ($end < 0) { |
7075
|
1 |
|
$length = (int) self::strlen($str, $encoding) + $end - $start; |
7076
|
|
|
} else { |
7077
|
3 |
|
$length = $end - $start; |
7078
|
|
|
} |
7079
|
|
|
|
7080
|
9 |
|
return self::substr($str, $start, $length, $encoding); |
7081
|
|
|
} |
7082
|
|
|
|
7083
|
|
|
/** |
7084
|
|
|
* Convert a string to e.g.: "snake_case" |
7085
|
|
|
* |
7086
|
|
|
* @param string $str |
7087
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
7088
|
|
|
* |
7089
|
|
|
* @return string string in snake_case |
7090
|
|
|
*/ |
7091
|
22 |
|
public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string |
7092
|
|
|
{ |
7093
|
22 |
|
if ($str === '') { |
7094
|
|
|
return ''; |
7095
|
|
|
} |
7096
|
|
|
|
7097
|
22 |
|
$str = \str_replace( |
7098
|
22 |
|
'-', |
7099
|
22 |
|
'_', |
7100
|
22 |
|
self::normalize_whitespace($str) |
7101
|
|
|
); |
7102
|
|
|
|
7103
|
22 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
7104
|
19 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
7105
|
|
|
} |
7106
|
|
|
|
7107
|
22 |
|
$str = (string) \preg_replace_callback( |
7108
|
22 |
|
'/([\\p{N}|\\p{Lu}])/u', |
7109
|
|
|
/** |
7110
|
|
|
* @param string[] $matches |
7111
|
|
|
* |
7112
|
|
|
* @return string |
7113
|
|
|
*/ |
7114
|
|
|
static function (array $matches) use ($encoding): string { |
7115
|
9 |
|
$match = $matches[1]; |
7116
|
9 |
|
$matchInt = (int) $match; |
7117
|
|
|
|
7118
|
9 |
|
if ((string) $matchInt === $match) { |
7119
|
4 |
|
return '_' . $match . '_'; |
7120
|
|
|
} |
7121
|
|
|
|
7122
|
5 |
|
if ($encoding === 'UTF-8') { |
7123
|
5 |
|
return '_' . \mb_strtolower($match); |
7124
|
|
|
} |
7125
|
|
|
|
7126
|
|
|
return '_' . self::strtolower($match, $encoding); |
7127
|
22 |
|
}, |
7128
|
22 |
|
$str |
7129
|
|
|
); |
7130
|
|
|
|
7131
|
22 |
|
$str = (string) \preg_replace( |
7132
|
|
|
[ |
7133
|
22 |
|
'/\\s+/u', // convert spaces to "_" |
7134
|
|
|
'/^\\s+|\\s+$/u', // trim leading & trailing spaces |
7135
|
|
|
'/_+/', // remove double "_" |
7136
|
|
|
], |
7137
|
|
|
[ |
7138
|
22 |
|
'_', |
7139
|
|
|
'', |
7140
|
|
|
'_', |
7141
|
|
|
], |
7142
|
22 |
|
$str |
7143
|
|
|
); |
7144
|
|
|
|
7145
|
22 |
|
return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace |
7146
|
|
|
} |
7147
|
|
|
|
7148
|
|
|
/** |
7149
|
|
|
* Sort all characters according to code points. |
7150
|
|
|
* |
7151
|
|
|
* @param string $str <p>A UTF-8 string.</p> |
7152
|
|
|
* @param bool $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p> |
7153
|
|
|
* @param bool $desc <p>If <strong>true</strong>, will sort characters in reverse code point order.</p> |
7154
|
|
|
* |
7155
|
|
|
* @return string string of sorted characters |
7156
|
|
|
*/ |
7157
|
2 |
|
public static function str_sort(string $str, bool $unique = false, bool $desc = false): string |
7158
|
|
|
{ |
7159
|
2 |
|
$array = self::codepoints($str); |
7160
|
|
|
|
7161
|
2 |
|
if ($unique) { |
7162
|
2 |
|
$array = \array_flip(\array_flip($array)); |
7163
|
|
|
} |
7164
|
|
|
|
7165
|
2 |
|
if ($desc) { |
7166
|
2 |
|
\arsort($array); |
|
|
|
|
7167
|
|
|
} else { |
7168
|
2 |
|
\asort($array); |
|
|
|
|
7169
|
|
|
} |
7170
|
|
|
|
7171
|
2 |
|
return self::string($array); |
7172
|
|
|
} |
7173
|
|
|
|
7174
|
|
|
/** |
7175
|
|
|
* Convert a string to an array of Unicode characters. |
7176
|
|
|
* |
7177
|
|
|
* @param int|int[]|string|string[] $str <p>The string to split into array.</p> |
7178
|
|
|
* @param int $length [optional] <p>Max character length of each array |
7179
|
|
|
* element.</p> |
7180
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
7181
|
|
|
* @param bool $tryToUseMbFunction [optional] <p>Set to false, if you don't want to use |
7182
|
|
|
* "mb_substr"</p> |
7183
|
|
|
* |
7184
|
|
|
* @return array |
7185
|
|
|
* <p>An array containing chunks of the input.</p> |
7186
|
|
|
*/ |
7187
|
89 |
|
public static function str_split( |
7188
|
|
|
$str, |
7189
|
|
|
int $length = 1, |
7190
|
|
|
bool $cleanUtf8 = false, |
7191
|
|
|
bool $tryToUseMbFunction = true |
7192
|
|
|
): array { |
7193
|
89 |
|
if ($length <= 0) { |
7194
|
3 |
|
return []; |
7195
|
|
|
} |
7196
|
|
|
|
7197
|
88 |
|
if (\is_array($str) === true) { |
7198
|
2 |
|
foreach ($str as $k => &$v) { |
7199
|
2 |
|
$v = self::str_split( |
7200
|
2 |
|
$v, |
7201
|
2 |
|
$length, |
7202
|
2 |
|
$cleanUtf8, |
7203
|
2 |
|
$tryToUseMbFunction |
7204
|
|
|
); |
7205
|
|
|
} |
7206
|
|
|
|
7207
|
2 |
|
return $str; |
7208
|
|
|
} |
7209
|
|
|
|
7210
|
|
|
// init |
7211
|
88 |
|
$str = (string) $str; |
7212
|
|
|
|
7213
|
88 |
|
if ($str === '') { |
7214
|
13 |
|
return []; |
7215
|
|
|
} |
7216
|
|
|
|
7217
|
85 |
|
if ($cleanUtf8 === true) { |
7218
|
19 |
|
$str = self::clean($str); |
7219
|
|
|
} |
7220
|
|
|
|
7221
|
|
|
if ( |
7222
|
85 |
|
$tryToUseMbFunction === true |
7223
|
|
|
&& |
7224
|
85 |
|
self::$SUPPORT['mbstring'] === true |
7225
|
|
|
) { |
7226
|
81 |
|
$iMax = \mb_strlen($str); |
7227
|
81 |
|
if ($iMax <= 127) { |
7228
|
75 |
|
$ret = []; |
7229
|
75 |
|
for ($i = 0; $i < $iMax; ++$i) { |
7230
|
75 |
|
$ret[] = \mb_substr($str, $i, 1); |
7231
|
|
|
} |
7232
|
|
|
} else { |
7233
|
15 |
|
$retArray = []; |
7234
|
15 |
|
\preg_match_all('/./us', $str, $retArray); |
7235
|
81 |
|
$ret = $retArray[0] ?? []; |
7236
|
|
|
} |
7237
|
23 |
|
} elseif (self::$SUPPORT['pcre_utf8'] === true) { |
7238
|
17 |
|
$retArray = []; |
7239
|
17 |
|
\preg_match_all('/./us', $str, $retArray); |
7240
|
17 |
|
$ret = $retArray[0] ?? []; |
7241
|
|
|
} else { |
7242
|
|
|
|
7243
|
|
|
// fallback |
7244
|
|
|
|
7245
|
8 |
|
$ret = []; |
7246
|
8 |
|
$len = \strlen($str); |
7247
|
|
|
|
7248
|
|
|
/** @noinspection ForeachInvariantsInspection */ |
7249
|
8 |
|
for ($i = 0; $i < $len; ++$i) { |
7250
|
8 |
|
if (($str[$i] & "\x80") === "\x00") { |
7251
|
8 |
|
$ret[] = $str[$i]; |
7252
|
|
|
} elseif ( |
7253
|
8 |
|
isset($str[$i + 1]) |
7254
|
|
|
&& |
7255
|
8 |
|
($str[$i] & "\xE0") === "\xC0" |
7256
|
|
|
) { |
7257
|
4 |
|
if (($str[$i + 1] & "\xC0") === "\x80") { |
7258
|
4 |
|
$ret[] = $str[$i] . $str[$i + 1]; |
7259
|
|
|
|
7260
|
4 |
|
++$i; |
7261
|
|
|
} |
7262
|
|
|
} elseif ( |
7263
|
6 |
|
isset($str[$i + 2]) |
7264
|
|
|
&& |
7265
|
6 |
|
($str[$i] & "\xF0") === "\xE0" |
7266
|
|
|
) { |
7267
|
|
|
if ( |
7268
|
6 |
|
($str[$i + 1] & "\xC0") === "\x80" |
7269
|
|
|
&& |
7270
|
6 |
|
($str[$i + 2] & "\xC0") === "\x80" |
7271
|
|
|
) { |
7272
|
6 |
|
$ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2]; |
7273
|
|
|
|
7274
|
6 |
|
$i += 2; |
7275
|
|
|
} |
7276
|
|
|
} elseif ( |
7277
|
|
|
isset($str[$i + 3]) |
7278
|
|
|
&& |
7279
|
|
|
($str[$i] & "\xF8") === "\xF0" |
7280
|
|
|
) { |
7281
|
|
|
if ( |
7282
|
|
|
($str[$i + 1] & "\xC0") === "\x80" |
7283
|
|
|
&& |
7284
|
|
|
($str[$i + 2] & "\xC0") === "\x80" |
7285
|
|
|
&& |
7286
|
|
|
($str[$i + 3] & "\xC0") === "\x80" |
7287
|
|
|
) { |
7288
|
|
|
$ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3]; |
7289
|
|
|
|
7290
|
|
|
$i += 3; |
7291
|
|
|
} |
7292
|
|
|
} |
7293
|
|
|
} |
7294
|
|
|
} |
7295
|
|
|
|
7296
|
85 |
|
if ($length > 1) { |
7297
|
11 |
|
$ret = \array_chunk($ret, $length); |
7298
|
|
|
|
7299
|
11 |
|
return \array_map( |
7300
|
|
|
static function (array &$item): string { |
7301
|
11 |
|
return \implode('', $item); |
7302
|
11 |
|
}, |
7303
|
11 |
|
$ret |
7304
|
|
|
); |
7305
|
|
|
} |
7306
|
|
|
|
7307
|
78 |
|
if (isset($ret[0]) && $ret[0] === '') { |
7308
|
|
|
return []; |
7309
|
|
|
} |
7310
|
|
|
|
7311
|
78 |
|
return $ret; |
7312
|
|
|
} |
7313
|
|
|
|
7314
|
|
|
/** |
7315
|
|
|
* Splits the string with the provided regular expression, returning an |
7316
|
|
|
* array of Stringy objects. An optional integer $limit will truncate the |
7317
|
|
|
* results. |
7318
|
|
|
* |
7319
|
|
|
* @param string $str |
7320
|
|
|
* @param string $pattern <p>The regex with which to split the string.</p> |
7321
|
|
|
* @param int $limit [optional] <p>Maximum number of results to return. Default: -1 === no limit</p> |
7322
|
|
|
* |
7323
|
|
|
* @return string[] an array of strings |
7324
|
|
|
*/ |
7325
|
16 |
|
public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array |
7326
|
|
|
{ |
7327
|
16 |
|
if ($limit === 0) { |
7328
|
2 |
|
return []; |
7329
|
|
|
} |
7330
|
|
|
|
7331
|
14 |
|
if ($pattern === '') { |
7332
|
1 |
|
return [$str]; |
7333
|
|
|
} |
7334
|
|
|
|
7335
|
13 |
|
if (self::$SUPPORT['mbstring'] === true) { |
7336
|
13 |
|
if ($limit >= 0) { |
7337
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
7338
|
8 |
|
$resultTmp = \mb_split($pattern, $str); |
7339
|
|
|
|
7340
|
8 |
|
$result = []; |
7341
|
8 |
|
foreach ($resultTmp as $itemTmp) { |
7342
|
8 |
|
if ($limit === 0) { |
7343
|
4 |
|
break; |
7344
|
|
|
} |
7345
|
8 |
|
--$limit; |
7346
|
|
|
|
7347
|
8 |
|
$result[] = $itemTmp; |
7348
|
|
|
} |
7349
|
|
|
|
7350
|
8 |
|
return $result; |
7351
|
|
|
} |
7352
|
|
|
|
7353
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
7354
|
5 |
|
return \mb_split($pattern, $str); |
7355
|
|
|
} |
7356
|
|
|
|
7357
|
|
|
if ($limit > 0) { |
7358
|
|
|
++$limit; |
7359
|
|
|
} else { |
7360
|
|
|
$limit = -1; |
7361
|
|
|
} |
7362
|
|
|
|
7363
|
|
|
$array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit); |
7364
|
|
|
|
7365
|
|
|
if ($array === false) { |
7366
|
|
|
return []; |
7367
|
|
|
} |
7368
|
|
|
|
7369
|
|
|
if ($limit > 0 && \count($array) === $limit) { |
7370
|
|
|
\array_pop($array); |
7371
|
|
|
} |
7372
|
|
|
|
7373
|
|
|
return $array; |
7374
|
|
|
} |
7375
|
|
|
|
7376
|
|
|
/** |
7377
|
|
|
* Check if the string starts with the given substring. |
7378
|
|
|
* |
7379
|
|
|
* @param string $haystack <p>The string to search in.</p> |
7380
|
|
|
* @param string $needle <p>The substring to search for.</p> |
7381
|
|
|
* |
7382
|
|
|
* @return bool |
7383
|
|
|
*/ |
7384
|
19 |
|
public static function str_starts_with(string $haystack, string $needle): bool |
7385
|
|
|
{ |
7386
|
19 |
|
if ($needle === '') { |
7387
|
2 |
|
return true; |
7388
|
|
|
} |
7389
|
|
|
|
7390
|
19 |
|
if ($haystack === '') { |
7391
|
|
|
return false; |
7392
|
|
|
} |
7393
|
|
|
|
7394
|
19 |
|
return \strpos($haystack, $needle) === 0; |
7395
|
|
|
} |
7396
|
|
|
|
7397
|
|
|
/** |
7398
|
|
|
* Returns true if the string begins with any of $substrings, false otherwise. |
7399
|
|
|
* |
7400
|
|
|
* - case-sensitive |
7401
|
|
|
* |
7402
|
|
|
* @param string $str <p>The input string.</p> |
7403
|
|
|
* @param array $substrings <p>Substrings to look for.</p> |
7404
|
|
|
* |
7405
|
|
|
* @return bool whether or not $str starts with $substring |
7406
|
|
|
*/ |
7407
|
8 |
|
public static function str_starts_with_any(string $str, array $substrings): bool |
7408
|
|
|
{ |
7409
|
8 |
|
if ($str === '') { |
7410
|
|
|
return false; |
7411
|
|
|
} |
7412
|
|
|
|
7413
|
8 |
|
if ($substrings === []) { |
7414
|
|
|
return false; |
7415
|
|
|
} |
7416
|
|
|
|
7417
|
8 |
|
foreach ($substrings as &$substring) { |
7418
|
8 |
|
if (self::str_starts_with($str, $substring)) { |
7419
|
8 |
|
return true; |
7420
|
|
|
} |
7421
|
|
|
} |
7422
|
|
|
|
7423
|
6 |
|
return false; |
7424
|
|
|
} |
7425
|
|
|
|
7426
|
|
|
/** |
7427
|
|
|
* Gets the substring after the first occurrence of a separator. |
7428
|
|
|
* |
7429
|
|
|
* @param string $str <p>The input string.</p> |
7430
|
|
|
* @param string $separator <p>The string separator.</p> |
7431
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
7432
|
|
|
* |
7433
|
|
|
* @return string |
7434
|
|
|
*/ |
7435
|
1 |
|
public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
7436
|
|
|
{ |
7437
|
1 |
|
if ($separator === '' || $str === '') { |
7438
|
1 |
|
return ''; |
7439
|
|
|
} |
7440
|
|
|
|
7441
|
1 |
|
if ($encoding === 'UTF-8') { |
7442
|
1 |
|
$offset = \mb_strpos($str, $separator); |
7443
|
1 |
|
if ($offset === false) { |
7444
|
1 |
|
return ''; |
7445
|
|
|
} |
7446
|
|
|
|
7447
|
1 |
|
return (string) \mb_substr( |
7448
|
1 |
|
$str, |
7449
|
1 |
|
$offset + (int) \mb_strlen($separator) |
7450
|
|
|
); |
7451
|
|
|
} |
7452
|
|
|
|
7453
|
|
|
$offset = self::strpos($str, $separator, 0, $encoding); |
7454
|
|
|
if ($offset === false) { |
7455
|
|
|
return ''; |
7456
|
|
|
} |
7457
|
|
|
|
7458
|
|
|
return (string) \mb_substr( |
7459
|
|
|
$str, |
7460
|
|
|
$offset + (int) self::strlen($separator, $encoding), |
7461
|
|
|
null, |
7462
|
|
|
$encoding |
7463
|
|
|
); |
7464
|
|
|
} |
7465
|
|
|
|
7466
|
|
|
/** |
7467
|
|
|
* Gets the substring after the last occurrence of a separator. |
7468
|
|
|
* |
7469
|
|
|
* @param string $str <p>The input string.</p> |
7470
|
|
|
* @param string $separator <p>The string separator.</p> |
7471
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
7472
|
|
|
* |
7473
|
|
|
* @return string |
7474
|
|
|
*/ |
7475
|
1 |
|
public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
7476
|
|
|
{ |
7477
|
1 |
|
if ($separator === '' || $str === '') { |
7478
|
1 |
|
return ''; |
7479
|
|
|
} |
7480
|
|
|
|
7481
|
1 |
|
if ($encoding === 'UTF-8') { |
7482
|
1 |
|
$offset = \mb_strrpos($str, $separator); |
7483
|
1 |
|
if ($offset === false) { |
7484
|
1 |
|
return ''; |
7485
|
|
|
} |
7486
|
|
|
|
7487
|
1 |
|
return (string) \mb_substr( |
7488
|
1 |
|
$str, |
7489
|
1 |
|
$offset + (int) \mb_strlen($separator) |
7490
|
|
|
); |
7491
|
|
|
} |
7492
|
|
|
|
7493
|
|
|
$offset = self::strrpos($str, $separator, 0, $encoding); |
7494
|
|
|
if ($offset === false) { |
7495
|
|
|
return ''; |
7496
|
|
|
} |
7497
|
|
|
|
7498
|
|
|
return (string) self::substr( |
7499
|
|
|
$str, |
7500
|
|
|
$offset + (int) self::strlen($separator, $encoding), |
7501
|
|
|
null, |
7502
|
|
|
$encoding |
7503
|
|
|
); |
7504
|
|
|
} |
7505
|
|
|
|
7506
|
|
|
/** |
7507
|
|
|
* Gets the substring before the first occurrence of a separator. |
7508
|
|
|
* |
7509
|
|
|
* @param string $str <p>The input string.</p> |
7510
|
|
|
* @param string $separator <p>The string separator.</p> |
7511
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
7512
|
|
|
* |
7513
|
|
|
* @return string |
7514
|
|
|
*/ |
7515
|
1 |
|
public static function str_substr_before_first_separator( |
7516
|
|
|
string $str, |
7517
|
|
|
string $separator, |
7518
|
|
|
string $encoding = 'UTF-8' |
7519
|
|
|
): string { |
7520
|
1 |
|
if ($separator === '' || $str === '') { |
7521
|
1 |
|
return ''; |
7522
|
|
|
} |
7523
|
|
|
|
7524
|
1 |
|
if ($encoding === 'UTF-8') { |
7525
|
1 |
|
$offset = \mb_strpos($str, $separator); |
7526
|
1 |
|
if ($offset === false) { |
7527
|
1 |
|
return ''; |
7528
|
|
|
} |
7529
|
|
|
|
7530
|
1 |
|
return (string) \mb_substr( |
7531
|
1 |
|
$str, |
7532
|
1 |
|
0, |
7533
|
1 |
|
$offset |
7534
|
|
|
); |
7535
|
|
|
} |
7536
|
|
|
|
7537
|
|
|
$offset = self::strpos($str, $separator, 0, $encoding); |
7538
|
|
|
if ($offset === false) { |
7539
|
|
|
return ''; |
7540
|
|
|
} |
7541
|
|
|
|
7542
|
|
|
return (string) self::substr( |
7543
|
|
|
$str, |
7544
|
|
|
0, |
7545
|
|
|
$offset, |
7546
|
|
|
$encoding |
7547
|
|
|
); |
7548
|
|
|
} |
7549
|
|
|
|
7550
|
|
|
/** |
7551
|
|
|
* Gets the substring before the last occurrence of a separator. |
7552
|
|
|
* |
7553
|
|
|
* @param string $str <p>The input string.</p> |
7554
|
|
|
* @param string $separator <p>The string separator.</p> |
7555
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
7556
|
|
|
* |
7557
|
|
|
* @return string |
7558
|
|
|
*/ |
7559
|
1 |
|
public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
7560
|
|
|
{ |
7561
|
1 |
|
if ($separator === '' || $str === '') { |
7562
|
1 |
|
return ''; |
7563
|
|
|
} |
7564
|
|
|
|
7565
|
1 |
|
if ($encoding === 'UTF-8') { |
7566
|
1 |
|
$offset = \mb_strrpos($str, $separator); |
7567
|
1 |
|
if ($offset === false) { |
7568
|
1 |
|
return ''; |
7569
|
|
|
} |
7570
|
|
|
|
7571
|
1 |
|
return (string) \mb_substr( |
7572
|
1 |
|
$str, |
7573
|
1 |
|
0, |
7574
|
1 |
|
$offset |
7575
|
|
|
); |
7576
|
|
|
} |
7577
|
|
|
|
7578
|
|
|
$offset = self::strrpos($str, $separator, 0, $encoding); |
7579
|
|
|
if ($offset === false) { |
7580
|
|
|
return ''; |
7581
|
|
|
} |
7582
|
|
|
|
7583
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
7584
|
|
|
|
7585
|
|
|
return (string) self::substr( |
7586
|
|
|
$str, |
7587
|
|
|
0, |
7588
|
|
|
$offset, |
7589
|
|
|
$encoding |
7590
|
|
|
); |
7591
|
|
|
} |
7592
|
|
|
|
7593
|
|
|
/** |
7594
|
|
|
* Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle". |
7595
|
|
|
* |
7596
|
|
|
* @param string $str <p>The input string.</p> |
7597
|
|
|
* @param string $needle <p>The string to look for.</p> |
7598
|
|
|
* @param bool $beforeNeedle [optional] <p>Default: false</p> |
7599
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
7600
|
|
|
* |
7601
|
|
|
* @return string |
7602
|
|
|
*/ |
7603
|
2 |
|
public static function str_substr_first( |
7604
|
|
|
string $str, |
7605
|
|
|
string $needle, |
7606
|
|
|
bool $beforeNeedle = false, |
7607
|
|
|
string $encoding = 'UTF-8' |
7608
|
|
|
): string { |
7609
|
2 |
|
if ($str === '' || $needle === '') { |
7610
|
2 |
|
return ''; |
7611
|
|
|
} |
7612
|
|
|
|
7613
|
2 |
|
if ($encoding === 'UTF-8') { |
7614
|
2 |
|
if ($beforeNeedle === true) { |
7615
|
1 |
|
$part = \mb_strstr( |
7616
|
1 |
|
$str, |
7617
|
1 |
|
$needle, |
7618
|
1 |
|
$beforeNeedle |
7619
|
|
|
); |
7620
|
|
|
} else { |
7621
|
1 |
|
$part = \mb_strstr( |
7622
|
1 |
|
$str, |
7623
|
2 |
|
$needle |
7624
|
|
|
); |
7625
|
|
|
} |
7626
|
|
|
} else { |
7627
|
|
|
$part = self::strstr( |
7628
|
|
|
$str, |
7629
|
|
|
$needle, |
7630
|
|
|
$beforeNeedle, |
7631
|
|
|
$encoding |
7632
|
|
|
); |
7633
|
|
|
} |
7634
|
|
|
|
7635
|
2 |
|
return $part === false ? '' : $part; |
7636
|
|
|
} |
7637
|
|
|
|
7638
|
|
|
/** |
7639
|
|
|
* Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle". |
7640
|
|
|
* |
7641
|
|
|
* @param string $str <p>The input string.</p> |
7642
|
|
|
* @param string $needle <p>The string to look for.</p> |
7643
|
|
|
* @param bool $beforeNeedle [optional] <p>Default: false</p> |
7644
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
7645
|
|
|
* |
7646
|
|
|
* @return string |
7647
|
|
|
*/ |
7648
|
2 |
|
public static function str_substr_last( |
7649
|
|
|
string $str, |
7650
|
|
|
string $needle, |
7651
|
|
|
bool $beforeNeedle = false, |
7652
|
|
|
string $encoding = 'UTF-8' |
7653
|
|
|
): string { |
7654
|
2 |
|
if ($str === '' || $needle === '') { |
7655
|
2 |
|
return ''; |
7656
|
|
|
} |
7657
|
|
|
|
7658
|
2 |
|
if ($encoding === 'UTF-8') { |
7659
|
2 |
|
if ($beforeNeedle === true) { |
7660
|
1 |
|
$part = \mb_strrchr( |
7661
|
1 |
|
$str, |
7662
|
1 |
|
$needle, |
7663
|
1 |
|
$beforeNeedle |
7664
|
|
|
); |
7665
|
|
|
} else { |
7666
|
1 |
|
$part = \mb_strrchr( |
7667
|
1 |
|
$str, |
7668
|
2 |
|
$needle |
7669
|
|
|
); |
7670
|
|
|
} |
7671
|
|
|
} else { |
7672
|
|
|
$part = self::strrchr( |
7673
|
|
|
$str, |
7674
|
|
|
$needle, |
7675
|
|
|
$beforeNeedle, |
7676
|
|
|
$encoding |
7677
|
|
|
); |
7678
|
|
|
} |
7679
|
|
|
|
7680
|
2 |
|
return $part === false ? '' : $part; |
7681
|
|
|
} |
7682
|
|
|
|
7683
|
|
|
/** |
7684
|
|
|
* Surrounds $str with the given substring. |
7685
|
|
|
* |
7686
|
|
|
* @param string $str |
7687
|
|
|
* @param string $substring <p>The substring to add to both sides.</P> |
7688
|
|
|
* |
7689
|
|
|
* @return string string with the substring both prepended and appended |
7690
|
|
|
*/ |
7691
|
5 |
|
public static function str_surround(string $str, string $substring): string |
7692
|
|
|
{ |
7693
|
5 |
|
return $substring . $str . $substring; |
7694
|
|
|
} |
7695
|
|
|
|
7696
|
|
|
/** |
7697
|
|
|
* Returns a trimmed string with the first letter of each word capitalized. |
7698
|
|
|
* Also accepts an array, $ignore, allowing you to list words not to be |
7699
|
|
|
* capitalized. |
7700
|
|
|
* |
7701
|
|
|
* @param string $str |
7702
|
|
|
* @param array|string[]|null $ignore [optional] <p>An array of words not to capitalize or null. |
7703
|
|
|
* Default: null</p> |
7704
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
7705
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
7706
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
7707
|
|
|
* tr</p> |
7708
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> |
7709
|
|
|
* ß</p> |
7710
|
|
|
* @param bool $useTrimFirst [optional] <p>true === trim the input string, first</p> |
7711
|
|
|
* |
7712
|
|
|
* @return string the titleized string |
7713
|
|
|
*/ |
7714
|
6 |
|
public static function str_titleize( |
7715
|
|
|
string $str, |
7716
|
|
|
array $ignore = null, |
7717
|
|
|
string $encoding = 'UTF-8', |
7718
|
|
|
bool $cleanUtf8 = false, |
7719
|
|
|
string $lang = null, |
7720
|
|
|
bool $tryToKeepStringLength = false, |
7721
|
|
|
bool $useTrimFirst = true |
7722
|
|
|
): string { |
7723
|
6 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
7724
|
5 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
7725
|
|
|
} |
7726
|
|
|
|
7727
|
6 |
|
if ($useTrimFirst === true) { |
7728
|
6 |
|
$str = \trim($str); |
7729
|
|
|
} |
7730
|
|
|
|
7731
|
6 |
|
if ($cleanUtf8 === true) { |
7732
|
|
|
$str = self::clean($str); |
7733
|
|
|
} |
7734
|
|
|
|
7735
|
6 |
|
$useMbFunction = $lang === null && $tryToKeepStringLength === false; |
7736
|
|
|
|
7737
|
6 |
|
return (string) \preg_replace_callback( |
7738
|
6 |
|
'/([^\\s]+)/u', |
7739
|
|
|
static function (array $match) use ($tryToKeepStringLength, $lang, $ignore, $useMbFunction, $encoding): string { |
7740
|
6 |
|
if ($ignore !== null && \in_array($match[0], $ignore, true)) { |
7741
|
2 |
|
return $match[0]; |
7742
|
|
|
} |
7743
|
|
|
|
7744
|
6 |
|
if ($useMbFunction === true) { |
7745
|
6 |
|
if ($encoding === 'UTF-8') { |
7746
|
6 |
|
return \mb_strtoupper(\mb_substr($match[0], 0, 1)) |
7747
|
6 |
|
. \mb_strtolower(\mb_substr($match[0], 1)); |
7748
|
|
|
} |
7749
|
|
|
|
7750
|
|
|
return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding) |
7751
|
|
|
. \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding); |
7752
|
|
|
} |
7753
|
|
|
|
7754
|
|
|
return self::ucfirst( |
7755
|
|
|
self::strtolower( |
7756
|
|
|
$match[0], |
7757
|
|
|
$encoding, |
7758
|
|
|
false, |
7759
|
|
|
$lang, |
7760
|
|
|
$tryToKeepStringLength |
7761
|
|
|
), |
7762
|
|
|
$encoding, |
7763
|
|
|
false, |
7764
|
|
|
$lang, |
7765
|
|
|
$tryToKeepStringLength |
7766
|
|
|
); |
7767
|
6 |
|
}, |
7768
|
6 |
|
$str |
7769
|
|
|
); |
7770
|
|
|
} |
7771
|
|
|
|
7772
|
|
|
/** |
7773
|
|
|
* Returns a trimmed string in proper title case. |
7774
|
|
|
* |
7775
|
|
|
* Also accepts an array, $ignore, allowing you to list words not to be |
7776
|
|
|
* capitalized. |
7777
|
|
|
* |
7778
|
|
|
* Adapted from John Gruber's script. |
7779
|
|
|
* |
7780
|
|
|
* @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78 |
7781
|
|
|
* |
7782
|
|
|
* @param string $str |
7783
|
|
|
* @param array $ignore <p>An array of words not to capitalize.</p> |
7784
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
7785
|
|
|
* |
7786
|
|
|
* @return string the titleized string |
7787
|
|
|
*/ |
7788
|
35 |
|
public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string |
7789
|
|
|
{ |
7790
|
35 |
|
$smallWords = \array_merge( |
7791
|
|
|
[ |
7792
|
35 |
|
'(?<!q&)a', |
7793
|
|
|
'an', |
7794
|
|
|
'and', |
7795
|
|
|
'as', |
7796
|
|
|
'at(?!&t)', |
7797
|
|
|
'but', |
7798
|
|
|
'by', |
7799
|
|
|
'en', |
7800
|
|
|
'for', |
7801
|
|
|
'if', |
7802
|
|
|
'in', |
7803
|
|
|
'of', |
7804
|
|
|
'on', |
7805
|
|
|
'or', |
7806
|
|
|
'the', |
7807
|
|
|
'to', |
7808
|
|
|
'v[.]?', |
7809
|
|
|
'via', |
7810
|
|
|
'vs[.]?', |
7811
|
|
|
], |
7812
|
35 |
|
$ignore |
7813
|
|
|
); |
7814
|
|
|
|
7815
|
35 |
|
$smallWordsRx = \implode('|', $smallWords); |
7816
|
35 |
|
$apostropheRx = '(?x: [\'’] [[:lower:]]* )?'; |
7817
|
|
|
|
7818
|
35 |
|
$str = \trim($str); |
7819
|
|
|
|
7820
|
35 |
|
if (self::has_lowercase($str) === false) { |
7821
|
2 |
|
$str = self::strtolower($str, $encoding); |
7822
|
|
|
} |
7823
|
|
|
|
7824
|
|
|
// the main substitutions |
7825
|
35 |
|
$str = (string) \preg_replace_callback( |
7826
|
|
|
'~\\b (_*) (?: # 1. Leading underscore and |
7827
|
|
|
( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ | # 2. file path or |
7828
|
35 |
|
[-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) # URL, domain, or email |
7829
|
|
|
| |
7830
|
35 |
|
( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' ) # 3. or small word (case-insensitive) |
7831
|
|
|
| |
7832
|
35 |
|
( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' ) # 4. or word w/o internal caps |
7833
|
|
|
| |
7834
|
35 |
|
( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' ) # 5. or some other word |
7835
|
|
|
) (_*) \\b # 6. With trailing underscore |
7836
|
|
|
~ux', |
7837
|
|
|
/** |
7838
|
|
|
* @param string[] $matches |
7839
|
|
|
* |
7840
|
|
|
* @return string |
7841
|
|
|
*/ |
7842
|
|
|
static function (array $matches) use ($encoding): string { |
7843
|
|
|
// preserve leading underscore |
7844
|
35 |
|
$str = $matches[1]; |
7845
|
35 |
|
if ($matches[2]) { |
7846
|
|
|
// preserve URLs, domains, emails and file paths |
7847
|
5 |
|
$str .= $matches[2]; |
7848
|
35 |
|
} elseif ($matches[3]) { |
7849
|
|
|
// lower-case small words |
7850
|
25 |
|
$str .= self::strtolower($matches[3], $encoding); |
7851
|
35 |
|
} elseif ($matches[4]) { |
7852
|
|
|
// capitalize word w/o internal caps |
7853
|
34 |
|
$str .= static::str_upper_first($matches[4], $encoding); |
7854
|
|
|
} else { |
7855
|
|
|
// preserve other kinds of word (iPhone) |
7856
|
7 |
|
$str .= $matches[5]; |
7857
|
|
|
} |
7858
|
|
|
// Preserve trailing underscore |
7859
|
35 |
|
$str .= $matches[6]; |
7860
|
|
|
|
7861
|
35 |
|
return $str; |
7862
|
35 |
|
}, |
7863
|
35 |
|
$str |
7864
|
|
|
); |
7865
|
|
|
|
7866
|
|
|
// Exceptions for small words: capitalize at start of title... |
7867
|
35 |
|
$str = (string) \preg_replace_callback( |
7868
|
|
|
'~( \\A [[:punct:]]* # start of title... |
7869
|
|
|
| [:.;?!][ ]+ # or of subsentence... |
7870
|
|
|
| [ ][\'"“‘(\[][ ]* ) # or of inserted subphrase... |
7871
|
35 |
|
( ' . $smallWordsRx . ' ) \\b # ...followed by small word |
7872
|
|
|
~uxi', |
7873
|
|
|
/** |
7874
|
|
|
* @param string[] $matches |
7875
|
|
|
* |
7876
|
|
|
* @return string |
7877
|
|
|
*/ |
7878
|
|
|
static function (array $matches) use ($encoding): string { |
7879
|
11 |
|
return $matches[1] . static::str_upper_first($matches[2], $encoding); |
7880
|
35 |
|
}, |
7881
|
35 |
|
$str |
7882
|
|
|
); |
7883
|
|
|
|
7884
|
|
|
// ...and end of title |
7885
|
35 |
|
$str = (string) \preg_replace_callback( |
7886
|
35 |
|
'~\\b ( ' . $smallWordsRx . ' ) # small word... |
7887
|
|
|
(?= [[:punct:]]* \Z # ...at the end of the title... |
7888
|
|
|
| [\'"’”)\]] [ ] ) # ...or of an inserted subphrase? |
7889
|
|
|
~uxi', |
7890
|
|
|
/** |
7891
|
|
|
* @param string[] $matches |
7892
|
|
|
* |
7893
|
|
|
* @return string |
7894
|
|
|
*/ |
7895
|
|
|
static function (array $matches) use ($encoding): string { |
7896
|
3 |
|
return static::str_upper_first($matches[1], $encoding); |
7897
|
35 |
|
}, |
7898
|
35 |
|
$str |
7899
|
|
|
); |
7900
|
|
|
|
7901
|
|
|
// Exceptions for small words in hyphenated compound words. |
7902
|
|
|
// e.g. "in-flight" -> In-Flight |
7903
|
35 |
|
$str = (string) \preg_replace_callback( |
7904
|
|
|
'~\\b |
7905
|
|
|
(?<! -) # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight) |
7906
|
35 |
|
( ' . $smallWordsRx . ' ) |
7907
|
|
|
(?= -[[:alpha:]]+) # lookahead for "-someword" |
7908
|
|
|
~uxi', |
7909
|
|
|
/** |
7910
|
|
|
* @param string[] $matches |
7911
|
|
|
* |
7912
|
|
|
* @return string |
7913
|
|
|
*/ |
7914
|
|
|
static function (array $matches) use ($encoding): string { |
7915
|
|
|
return static::str_upper_first($matches[1], $encoding); |
7916
|
35 |
|
}, |
7917
|
35 |
|
$str |
7918
|
|
|
); |
7919
|
|
|
|
7920
|
|
|
// e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point) |
7921
|
35 |
|
$str = (string) \preg_replace_callback( |
7922
|
|
|
'~\\b |
7923
|
|
|
(?<!…) # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in) |
7924
|
|
|
( [[:alpha:]]+- ) # $1 = first word and hyphen, should already be properly capped |
7925
|
35 |
|
( ' . $smallWordsRx . ' ) # ...followed by small word |
7926
|
|
|
(?! - ) # Negative lookahead for another - |
7927
|
|
|
~uxi', |
7928
|
|
|
/** |
7929
|
|
|
* @param string[] $matches |
7930
|
|
|
* |
7931
|
|
|
* @return string |
7932
|
|
|
*/ |
7933
|
|
|
static function (array $matches) use ($encoding): string { |
7934
|
|
|
return $matches[1] . static::str_upper_first($matches[2], $encoding); |
7935
|
35 |
|
}, |
7936
|
35 |
|
$str |
7937
|
|
|
); |
7938
|
|
|
|
7939
|
35 |
|
return $str; |
7940
|
|
|
} |
7941
|
|
|
|
7942
|
|
|
/** |
7943
|
|
|
* Get a binary representation of a specific string. |
7944
|
|
|
* |
7945
|
|
|
* @param string $str <p>The input string.</p> |
7946
|
|
|
* |
7947
|
|
|
* @return false|string |
7948
|
|
|
* <p>false on error</p> |
7949
|
|
|
*/ |
7950
|
2 |
|
public static function str_to_binary(string $str) |
7951
|
|
|
{ |
7952
|
2 |
|
$value = \unpack('H*', $str); |
7953
|
2 |
|
if ($value === false) { |
7954
|
|
|
return false; |
7955
|
|
|
} |
7956
|
|
|
|
7957
|
|
|
/** @noinspection OffsetOperationsInspection */ |
7958
|
2 |
|
return \base_convert($value[1], 16, 2); |
7959
|
|
|
} |
7960
|
|
|
|
7961
|
|
|
/** |
7962
|
|
|
* @param string $str |
7963
|
|
|
* @param bool $removeEmptyValues <p>Remove empty values.</p> |
7964
|
|
|
* @param int|null $removeShortValues <p>The min. string length or null to disable</p> |
7965
|
|
|
* |
7966
|
|
|
* @return string[] |
7967
|
|
|
*/ |
7968
|
17 |
|
public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array |
7969
|
|
|
{ |
7970
|
17 |
|
if ($str === '') { |
7971
|
1 |
|
return $removeEmptyValues === true ? [] : ['']; |
7972
|
|
|
} |
7973
|
|
|
|
7974
|
16 |
|
if (self::$SUPPORT['mbstring'] === true) { |
7975
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
7976
|
16 |
|
$return = \mb_split("[\r\n]{1,2}", $str); |
7977
|
|
|
} else { |
7978
|
|
|
$return = \preg_split("/[\r\n]{1,2}/u", $str); |
7979
|
|
|
} |
7980
|
|
|
|
7981
|
16 |
|
if ($return === false) { |
7982
|
|
|
return $removeEmptyValues === true ? [] : ['']; |
7983
|
|
|
} |
7984
|
|
|
|
7985
|
|
|
if ( |
7986
|
16 |
|
$removeShortValues === null |
7987
|
|
|
&& |
7988
|
16 |
|
$removeEmptyValues === false |
7989
|
|
|
) { |
7990
|
16 |
|
return $return; |
|
|
|
|
7991
|
|
|
} |
7992
|
|
|
|
7993
|
|
|
return self::reduce_string_array( |
7994
|
|
|
$return, |
7995
|
|
|
$removeEmptyValues, |
7996
|
|
|
$removeShortValues |
7997
|
|
|
); |
7998
|
|
|
} |
7999
|
|
|
|
8000
|
|
|
/** |
8001
|
|
|
* Convert a string into an array of words. |
8002
|
|
|
* |
8003
|
|
|
* @param string $str |
8004
|
|
|
* @param string $charList <p>Additional chars for the definition of "words".</p> |
8005
|
|
|
* @param bool $removeEmptyValues <p>Remove empty values.</p> |
8006
|
|
|
* @param int|null $removeShortValues <p>The min. string length or null to disable</p> |
8007
|
|
|
* |
8008
|
|
|
* @return string[] |
8009
|
|
|
*/ |
8010
|
13 |
|
public static function str_to_words( |
8011
|
|
|
string $str, |
8012
|
|
|
string $charList = '', |
8013
|
|
|
bool $removeEmptyValues = false, |
8014
|
|
|
int $removeShortValues = null |
8015
|
|
|
): array { |
8016
|
13 |
|
if ($str === '') { |
8017
|
4 |
|
return $removeEmptyValues === true ? [] : ['']; |
8018
|
|
|
} |
8019
|
|
|
|
8020
|
13 |
|
$charList = self::rxClass($charList, '\pL'); |
8021
|
|
|
|
8022
|
13 |
|
$return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE); |
8023
|
13 |
|
if ($return === false) { |
8024
|
|
|
return $removeEmptyValues === true ? [] : ['']; |
8025
|
|
|
} |
8026
|
|
|
|
8027
|
|
|
if ( |
8028
|
13 |
|
$removeShortValues === null |
8029
|
|
|
&& |
8030
|
13 |
|
$removeEmptyValues === false |
8031
|
|
|
) { |
8032
|
13 |
|
return $return; |
|
|
|
|
8033
|
|
|
} |
8034
|
|
|
|
8035
|
2 |
|
$tmpReturn = self::reduce_string_array( |
8036
|
2 |
|
$return, |
8037
|
2 |
|
$removeEmptyValues, |
8038
|
2 |
|
$removeShortValues |
8039
|
|
|
); |
8040
|
|
|
|
8041
|
2 |
|
foreach ($tmpReturn as &$item) { |
8042
|
2 |
|
$item = (string) $item; |
8043
|
|
|
} |
8044
|
|
|
|
8045
|
2 |
|
return $tmpReturn; |
8046
|
|
|
} |
8047
|
|
|
|
8048
|
|
|
/** |
8049
|
|
|
* alias for "UTF8::to_ascii()" |
8050
|
|
|
* |
8051
|
|
|
* @param string $str |
8052
|
|
|
* @param string $unknown |
8053
|
|
|
* @param bool $strict |
8054
|
|
|
* |
8055
|
|
|
* @return string |
8056
|
|
|
* |
8057
|
|
|
* @see UTF8::to_ascii() |
8058
|
|
|
*/ |
8059
|
8 |
|
public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string |
8060
|
|
|
{ |
8061
|
8 |
|
return self::to_ascii($str, $unknown, $strict); |
8062
|
|
|
} |
8063
|
|
|
|
8064
|
|
|
/** |
8065
|
|
|
* Truncates the string to a given length. If $substring is provided, and |
8066
|
|
|
* truncating occurs, the string is further truncated so that the substring |
8067
|
|
|
* may be appended without exceeding the desired length. |
8068
|
|
|
* |
8069
|
|
|
* @param string $str |
8070
|
|
|
* @param int $length <p>Desired length of the truncated string.</p> |
8071
|
|
|
* @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p> |
8072
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
8073
|
|
|
* |
8074
|
|
|
* @return string string after truncating |
8075
|
|
|
*/ |
8076
|
22 |
|
public static function str_truncate( |
8077
|
|
|
string $str, |
8078
|
|
|
int $length, |
8079
|
|
|
string $substring = '', |
8080
|
|
|
string $encoding = 'UTF-8' |
8081
|
|
|
): string { |
8082
|
22 |
|
if ($str === '') { |
8083
|
|
|
return ''; |
8084
|
|
|
} |
8085
|
|
|
|
8086
|
22 |
|
if ($encoding === 'UTF-8') { |
8087
|
10 |
|
if ($length >= (int) \mb_strlen($str)) { |
8088
|
2 |
|
return $str; |
8089
|
|
|
} |
8090
|
|
|
|
8091
|
8 |
|
if ($substring !== '') { |
8092
|
4 |
|
$length -= (int) \mb_strlen($substring); |
8093
|
|
|
|
8094
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
8095
|
4 |
|
return (string) \mb_substr($str, 0, $length) . $substring; |
8096
|
|
|
} |
8097
|
|
|
|
8098
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
8099
|
4 |
|
return (string) \mb_substr($str, 0, $length); |
8100
|
|
|
} |
8101
|
|
|
|
8102
|
12 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8103
|
|
|
|
8104
|
12 |
|
if ($length >= (int) self::strlen($str, $encoding)) { |
8105
|
2 |
|
return $str; |
8106
|
|
|
} |
8107
|
|
|
|
8108
|
10 |
|
if ($substring !== '') { |
8109
|
6 |
|
$length -= (int) self::strlen($substring, $encoding); |
8110
|
|
|
} |
8111
|
|
|
|
8112
|
|
|
return ( |
8113
|
10 |
|
(string) self::substr( |
8114
|
10 |
|
$str, |
8115
|
10 |
|
0, |
8116
|
10 |
|
$length, |
8117
|
10 |
|
$encoding |
8118
|
|
|
) |
8119
|
10 |
|
) . $substring; |
8120
|
|
|
} |
8121
|
|
|
|
8122
|
|
|
/** |
8123
|
|
|
* Truncates the string to a given length, while ensuring that it does not |
8124
|
|
|
* split words. If $substring is provided, and truncating occurs, the |
8125
|
|
|
* string is further truncated so that the substring may be appended without |
8126
|
|
|
* exceeding the desired length. |
8127
|
|
|
* |
8128
|
|
|
* @param string $str |
8129
|
|
|
* @param int $length <p>Desired length of the truncated string.</p> |
8130
|
|
|
* @param string $substring [optional] <p>The substring to append if it can fit. Default: |
8131
|
|
|
* ''</p> |
8132
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
8133
|
|
|
* @param bool $ignoreDoNotSplitWordsForOneWord [optional] <p>Default: false</p> |
8134
|
|
|
* |
8135
|
|
|
* @return string string after truncating |
8136
|
|
|
*/ |
8137
|
47 |
|
public static function str_truncate_safe( |
8138
|
|
|
string $str, |
8139
|
|
|
int $length, |
8140
|
|
|
string $substring = '', |
8141
|
|
|
string $encoding = 'UTF-8', |
8142
|
|
|
bool $ignoreDoNotSplitWordsForOneWord = false |
8143
|
|
|
): string { |
8144
|
47 |
|
if ($str === '' || $length <= 0) { |
8145
|
1 |
|
return $substring; |
8146
|
|
|
} |
8147
|
|
|
|
8148
|
47 |
|
if ($encoding === 'UTF-8') { |
8149
|
21 |
|
if ($length >= (int) \mb_strlen($str)) { |
8150
|
5 |
|
return $str; |
8151
|
|
|
} |
8152
|
|
|
|
8153
|
|
|
// need to further trim the string so we can append the substring |
8154
|
17 |
|
$length -= (int) \mb_strlen($substring); |
8155
|
17 |
|
if ($length <= 0) { |
8156
|
1 |
|
return $substring; |
8157
|
|
|
} |
8158
|
|
|
|
8159
|
17 |
|
$truncated = \mb_substr($str, 0, $length); |
8160
|
|
|
|
8161
|
17 |
|
if ($truncated === false) { |
8162
|
|
|
return ''; |
8163
|
|
|
} |
8164
|
|
|
|
8165
|
|
|
// if the last word was truncated |
8166
|
17 |
|
$strPosSpace = \mb_strpos($str, ' ', $length - 1); |
8167
|
17 |
|
if ($strPosSpace !== $length) { |
8168
|
|
|
// find pos of the last occurrence of a space, get up to that |
8169
|
13 |
|
$lastPos = \mb_strrpos($truncated, ' ', 0); |
8170
|
|
|
|
8171
|
|
|
if ( |
8172
|
13 |
|
$lastPos !== false |
8173
|
|
|
|| |
8174
|
13 |
|
($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false) |
8175
|
|
|
) { |
8176
|
17 |
|
$truncated = (string) \mb_substr($truncated, 0, (int) $lastPos); |
8177
|
|
|
} |
8178
|
|
|
} |
8179
|
|
|
} else { |
8180
|
26 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8181
|
|
|
|
8182
|
26 |
|
if ($length >= (int) self::strlen($str, $encoding)) { |
8183
|
4 |
|
return $str; |
8184
|
|
|
} |
8185
|
|
|
|
8186
|
|
|
// need to further trim the string so we can append the substring |
8187
|
22 |
|
$length -= (int) self::strlen($substring, $encoding); |
8188
|
22 |
|
if ($length <= 0) { |
8189
|
|
|
return $substring; |
8190
|
|
|
} |
8191
|
|
|
|
8192
|
22 |
|
$truncated = self::substr($str, 0, $length, $encoding); |
8193
|
|
|
|
8194
|
22 |
|
if ($truncated === false) { |
8195
|
|
|
return ''; |
8196
|
|
|
} |
8197
|
|
|
|
8198
|
|
|
// if the last word was truncated |
8199
|
22 |
|
$strPosSpace = self::strpos($str, ' ', $length - 1, $encoding); |
8200
|
22 |
|
if ($strPosSpace !== $length) { |
8201
|
|
|
// find pos of the last occurrence of a space, get up to that |
8202
|
12 |
|
$lastPos = self::strrpos($truncated, ' ', 0, $encoding); |
8203
|
|
|
|
8204
|
|
|
if ( |
8205
|
12 |
|
$lastPos !== false |
8206
|
|
|
|| |
8207
|
12 |
|
($strPosSpace !== false && $ignoreDoNotSplitWordsForOneWord === false) |
8208
|
|
|
) { |
8209
|
9 |
|
$truncated = (string) self::substr($truncated, 0, (int) $lastPos, $encoding); |
8210
|
|
|
} |
8211
|
|
|
} |
8212
|
|
|
} |
8213
|
|
|
|
8214
|
39 |
|
return $truncated . $substring; |
8215
|
|
|
} |
8216
|
|
|
|
8217
|
|
|
/** |
8218
|
|
|
* Returns a lowercase and trimmed string separated by underscores. |
8219
|
|
|
* Underscores are inserted before uppercase characters (with the exception |
8220
|
|
|
* of the first character of the string), and in place of spaces as well as |
8221
|
|
|
* dashes. |
8222
|
|
|
* |
8223
|
|
|
* @param string $str |
8224
|
|
|
* |
8225
|
|
|
* @return string the underscored string |
8226
|
|
|
*/ |
8227
|
16 |
|
public static function str_underscored(string $str): string |
8228
|
|
|
{ |
8229
|
16 |
|
return self::str_delimit($str, '_'); |
8230
|
|
|
} |
8231
|
|
|
|
8232
|
|
|
/** |
8233
|
|
|
* Returns an UpperCamelCase version of the supplied string. It trims |
8234
|
|
|
* surrounding spaces, capitalizes letters following digits, spaces, dashes |
8235
|
|
|
* and underscores, and removes spaces, dashes, underscores. |
8236
|
|
|
* |
8237
|
|
|
* @param string $str <p>The input string.</p> |
8238
|
|
|
* @param string $encoding [optional] <p>Default: 'UTF-8'</p> |
8239
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
8240
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
8241
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
8242
|
|
|
* |
8243
|
|
|
* @return string string in UpperCamelCase |
8244
|
|
|
*/ |
8245
|
13 |
|
public static function str_upper_camelize( |
8246
|
|
|
string $str, |
8247
|
|
|
string $encoding = 'UTF-8', |
8248
|
|
|
bool $cleanUtf8 = false, |
8249
|
|
|
string $lang = null, |
8250
|
|
|
bool $tryToKeepStringLength = false |
8251
|
|
|
): string { |
8252
|
13 |
|
return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
8253
|
|
|
} |
8254
|
|
|
|
8255
|
|
|
/** |
8256
|
|
|
* alias for "UTF8::ucfirst()" |
8257
|
|
|
* |
8258
|
|
|
* @param string $str |
8259
|
|
|
* @param string $encoding |
8260
|
|
|
* @param bool $cleanUtf8 |
8261
|
|
|
* @param string|null $lang |
8262
|
|
|
* @param bool $tryToKeepStringLength |
8263
|
|
|
* |
8264
|
|
|
* @return string |
8265
|
|
|
* |
8266
|
|
|
* @see UTF8::ucfirst() |
8267
|
|
|
*/ |
8268
|
39 |
|
public static function str_upper_first( |
8269
|
|
|
string $str, |
8270
|
|
|
string $encoding = 'UTF-8', |
8271
|
|
|
bool $cleanUtf8 = false, |
8272
|
|
|
string $lang = null, |
8273
|
|
|
bool $tryToKeepStringLength = false |
8274
|
|
|
): string { |
8275
|
39 |
|
return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
8276
|
|
|
} |
8277
|
|
|
|
8278
|
|
|
/** |
8279
|
|
|
* Counts number of words in the UTF-8 string. |
8280
|
|
|
* |
8281
|
|
|
* @param string $str <p>The input string.</p> |
8282
|
|
|
* @param int $format [optional] <p> |
8283
|
|
|
* <strong>0</strong> => return a number of words (default)<br> |
8284
|
|
|
* <strong>1</strong> => return an array of words<br> |
8285
|
|
|
* <strong>2</strong> => return an array of words with word-offset as key |
8286
|
|
|
* </p> |
8287
|
|
|
* @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p> |
8288
|
|
|
* |
8289
|
|
|
* @return int|string[] The number of words in the string |
8290
|
|
|
*/ |
8291
|
2 |
|
public static function str_word_count(string $str, int $format = 0, string $charlist = '') |
8292
|
|
|
{ |
8293
|
2 |
|
$strParts = self::str_to_words($str, $charlist); |
8294
|
|
|
|
8295
|
2 |
|
$len = \count($strParts); |
8296
|
|
|
|
8297
|
2 |
|
if ($format === 1) { |
8298
|
2 |
|
$numberOfWords = []; |
8299
|
2 |
|
for ($i = 1; $i < $len; $i += 2) { |
8300
|
2 |
|
$numberOfWords[] = $strParts[$i]; |
8301
|
|
|
} |
8302
|
2 |
|
} elseif ($format === 2) { |
8303
|
2 |
|
$numberOfWords = []; |
8304
|
2 |
|
$offset = (int) self::strlen($strParts[0]); |
8305
|
2 |
|
for ($i = 1; $i < $len; $i += 2) { |
8306
|
2 |
|
$numberOfWords[$offset] = $strParts[$i]; |
8307
|
2 |
|
$offset += (int) self::strlen($strParts[$i]) + (int) self::strlen($strParts[$i + 1]); |
8308
|
|
|
} |
8309
|
|
|
} else { |
8310
|
2 |
|
$numberOfWords = (int) (($len - 1) / 2); |
8311
|
|
|
} |
8312
|
|
|
|
8313
|
2 |
|
return $numberOfWords; |
8314
|
|
|
} |
8315
|
|
|
|
8316
|
|
|
/** |
8317
|
|
|
* Case-insensitive string comparison. |
8318
|
|
|
* |
8319
|
|
|
* INFO: Case-insensitive version of UTF8::strcmp() |
8320
|
|
|
* |
8321
|
|
|
* @param string $str1 <p>The first string.</p> |
8322
|
|
|
* @param string $str2 <p>The second string.</p> |
8323
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8324
|
|
|
* |
8325
|
|
|
* @return int |
8326
|
|
|
* <strong>< 0</strong> if str1 is less than str2;<br> |
8327
|
|
|
* <strong>> 0</strong> if str1 is greater than str2,<br> |
8328
|
|
|
* <strong>0</strong> if they are equal |
8329
|
|
|
*/ |
8330
|
23 |
|
public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int |
8331
|
|
|
{ |
8332
|
23 |
|
return self::strcmp( |
8333
|
23 |
|
self::strtocasefold($str1, true, false, $encoding, null, false), |
8334
|
23 |
|
self::strtocasefold($str2, true, false, $encoding, null, false) |
8335
|
|
|
); |
8336
|
|
|
} |
8337
|
|
|
|
8338
|
|
|
/** |
8339
|
|
|
* alias for "UTF8::strstr()" |
8340
|
|
|
* |
8341
|
|
|
* @param string $haystack |
8342
|
|
|
* @param string $needle |
8343
|
|
|
* @param bool $before_needle |
8344
|
|
|
* @param string $encoding |
8345
|
|
|
* @param bool $cleanUtf8 |
8346
|
|
|
* |
8347
|
|
|
* @return false|string |
8348
|
|
|
* |
8349
|
|
|
* @see UTF8::strstr() |
8350
|
|
|
*/ |
8351
|
2 |
|
public static function strchr( |
8352
|
|
|
string $haystack, |
8353
|
|
|
string $needle, |
8354
|
|
|
bool $before_needle = false, |
8355
|
|
|
string $encoding = 'UTF-8', |
8356
|
|
|
bool $cleanUtf8 = false |
8357
|
|
|
) { |
8358
|
2 |
|
return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8); |
8359
|
|
|
} |
8360
|
|
|
|
8361
|
|
|
/** |
8362
|
|
|
* Case-sensitive string comparison. |
8363
|
|
|
* |
8364
|
|
|
* @param string $str1 <p>The first string.</p> |
8365
|
|
|
* @param string $str2 <p>The second string.</p> |
8366
|
|
|
* |
8367
|
|
|
* @return int |
8368
|
|
|
* <strong>< 0</strong> if str1 is less than str2<br> |
8369
|
|
|
* <strong>> 0</strong> if str1 is greater than str2<br> |
8370
|
|
|
* <strong>0</strong> if they are equal |
8371
|
|
|
*/ |
8372
|
29 |
|
public static function strcmp(string $str1, string $str2): int |
8373
|
|
|
{ |
8374
|
29 |
|
if ($str1 === $str2) { |
8375
|
21 |
|
return 0; |
8376
|
|
|
} |
8377
|
|
|
|
8378
|
24 |
|
return \strcmp( |
8379
|
24 |
|
\Normalizer::normalize($str1, \Normalizer::NFD), |
8380
|
24 |
|
\Normalizer::normalize($str2, \Normalizer::NFD) |
8381
|
|
|
); |
8382
|
|
|
} |
8383
|
|
|
|
8384
|
|
|
/** |
8385
|
|
|
* Find length of initial segment not matching mask. |
8386
|
|
|
* |
8387
|
|
|
* @param string $str |
8388
|
|
|
* @param string $charList |
8389
|
|
|
* @param int $offset |
8390
|
|
|
* @param int $length |
8391
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8392
|
|
|
* |
8393
|
|
|
* @return int |
8394
|
|
|
*/ |
8395
|
12 |
|
public static function strcspn( |
8396
|
|
|
string $str, |
8397
|
|
|
string $charList, |
8398
|
|
|
int $offset = null, |
8399
|
|
|
int $length = null, |
8400
|
|
|
string $encoding = 'UTF-8' |
8401
|
|
|
): int { |
8402
|
12 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
8403
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8404
|
|
|
} |
8405
|
|
|
|
8406
|
12 |
|
if ($charList === '') { |
8407
|
2 |
|
return (int) self::strlen($str, $encoding); |
8408
|
|
|
} |
8409
|
|
|
|
8410
|
11 |
|
if ($offset !== null || $length !== null) { |
8411
|
3 |
|
if ($encoding === 'UTF-8') { |
8412
|
3 |
|
if ($length === null) { |
8413
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
8414
|
2 |
|
$strTmp = \mb_substr($str, (int) $offset); |
8415
|
|
|
} else { |
8416
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
8417
|
3 |
|
$strTmp = \mb_substr($str, (int) $offset, $length); |
8418
|
|
|
} |
8419
|
|
|
} else { |
8420
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
8421
|
|
|
$strTmp = self::substr($str, (int) $offset, $length, $encoding); |
8422
|
|
|
} |
8423
|
|
|
|
8424
|
3 |
|
if ($strTmp === false) { |
8425
|
|
|
return 0; |
8426
|
|
|
} |
8427
|
|
|
|
8428
|
3 |
|
$str = $strTmp; |
8429
|
|
|
} |
8430
|
|
|
|
8431
|
11 |
|
if ($str === '') { |
8432
|
2 |
|
return 0; |
8433
|
|
|
} |
8434
|
|
|
|
8435
|
10 |
|
$matches = []; |
8436
|
10 |
|
if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $matches)) { |
8437
|
9 |
|
$return = self::strlen($matches[1], $encoding); |
8438
|
9 |
|
if ($return === false) { |
8439
|
|
|
return 0; |
8440
|
|
|
} |
8441
|
|
|
|
8442
|
9 |
|
return $return; |
8443
|
|
|
} |
8444
|
|
|
|
8445
|
2 |
|
return (int) self::strlen($str, $encoding); |
8446
|
|
|
} |
8447
|
|
|
|
8448
|
|
|
/** |
8449
|
|
|
* alias for "UTF8::stristr()" |
8450
|
|
|
* |
8451
|
|
|
* @param string $haystack |
8452
|
|
|
* @param string $needle |
8453
|
|
|
* @param bool $before_needle |
8454
|
|
|
* @param string $encoding |
8455
|
|
|
* @param bool $cleanUtf8 |
8456
|
|
|
* |
8457
|
|
|
* @return false|string |
8458
|
|
|
* |
8459
|
|
|
* @see UTF8::stristr() |
8460
|
|
|
*/ |
8461
|
1 |
|
public static function strichr( |
8462
|
|
|
string $haystack, |
8463
|
|
|
string $needle, |
8464
|
|
|
bool $before_needle = false, |
8465
|
|
|
string $encoding = 'UTF-8', |
8466
|
|
|
bool $cleanUtf8 = false |
8467
|
|
|
) { |
8468
|
1 |
|
return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8); |
8469
|
|
|
} |
8470
|
|
|
|
8471
|
|
|
/** |
8472
|
|
|
* Create a UTF-8 string from code points. |
8473
|
|
|
* |
8474
|
|
|
* INFO: opposite to UTF8::codepoints() |
8475
|
|
|
* |
8476
|
|
|
* @param array $array <p>Integer or Hexadecimal codepoints.</p> |
8477
|
|
|
* |
8478
|
|
|
* @return string UTF-8 encoded string |
8479
|
|
|
*/ |
8480
|
4 |
|
public static function string(array $array): string |
8481
|
|
|
{ |
8482
|
4 |
|
return \implode( |
8483
|
4 |
|
'', |
8484
|
4 |
|
\array_map( |
8485
|
|
|
[ |
8486
|
4 |
|
self::class, |
8487
|
|
|
'chr', |
8488
|
|
|
], |
8489
|
4 |
|
$array |
8490
|
|
|
) |
8491
|
|
|
); |
8492
|
|
|
} |
8493
|
|
|
|
8494
|
|
|
/** |
8495
|
|
|
* Checks if string starts with "BOM" (Byte Order Mark Character) character. |
8496
|
|
|
* |
8497
|
|
|
* @param string $str <p>The input string.</p> |
8498
|
|
|
* |
8499
|
|
|
* @return bool |
8500
|
|
|
* <strong>true</strong> if the string has BOM at the start,<br> |
8501
|
|
|
* <strong>false</strong> otherwise |
8502
|
|
|
*/ |
8503
|
6 |
|
public static function string_has_bom(string $str): bool |
8504
|
|
|
{ |
8505
|
|
|
/** @noinspection PhpUnusedLocalVariableInspection */ |
8506
|
6 |
|
foreach (self::$BOM as $bomString => &$bomByteLength) { |
8507
|
6 |
|
if (\strpos($str, $bomString) === 0) { |
8508
|
6 |
|
return true; |
8509
|
|
|
} |
8510
|
|
|
} |
8511
|
|
|
|
8512
|
6 |
|
return false; |
8513
|
|
|
} |
8514
|
|
|
|
8515
|
|
|
/** |
8516
|
|
|
* Strip HTML and PHP tags from a string + clean invalid UTF-8. |
8517
|
|
|
* |
8518
|
|
|
* @see http://php.net/manual/en/function.strip-tags.php |
8519
|
|
|
* |
8520
|
|
|
* @param string $str <p> |
8521
|
|
|
* The input string. |
8522
|
|
|
* </p> |
8523
|
|
|
* @param string $allowable_tags [optional] <p> |
8524
|
|
|
* You can use the optional second parameter to specify tags which should |
8525
|
|
|
* not be stripped. |
8526
|
|
|
* </p> |
8527
|
|
|
* <p> |
8528
|
|
|
* HTML comments and PHP tags are also stripped. This is hardcoded and |
8529
|
|
|
* can not be changed with allowable_tags. |
8530
|
|
|
* </p> |
8531
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
8532
|
|
|
* |
8533
|
|
|
* @return string the stripped string |
8534
|
|
|
*/ |
8535
|
4 |
|
public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string |
8536
|
|
|
{ |
8537
|
4 |
|
if ($str === '') { |
8538
|
1 |
|
return ''; |
8539
|
|
|
} |
8540
|
|
|
|
8541
|
4 |
|
if ($cleanUtf8 === true) { |
8542
|
2 |
|
$str = self::clean($str); |
8543
|
|
|
} |
8544
|
|
|
|
8545
|
4 |
|
if ($allowable_tags === null) { |
8546
|
4 |
|
return \strip_tags($str); |
8547
|
|
|
} |
8548
|
|
|
|
8549
|
2 |
|
return \strip_tags($str, $allowable_tags); |
8550
|
|
|
} |
8551
|
|
|
|
8552
|
|
|
/** |
8553
|
|
|
* Strip all whitespace characters. This includes tabs and newline |
8554
|
|
|
* characters, as well as multibyte whitespace such as the thin space |
8555
|
|
|
* and ideographic space. |
8556
|
|
|
* |
8557
|
|
|
* @param string $str |
8558
|
|
|
* |
8559
|
|
|
* @return string |
8560
|
|
|
*/ |
8561
|
36 |
|
public static function strip_whitespace(string $str): string |
8562
|
|
|
{ |
8563
|
36 |
|
if ($str === '') { |
8564
|
3 |
|
return ''; |
8565
|
|
|
} |
8566
|
|
|
|
8567
|
33 |
|
return (string) \preg_replace('/[[:space:]]+/u', '', $str); |
8568
|
|
|
} |
8569
|
|
|
|
8570
|
|
|
/** |
8571
|
|
|
* Finds position of first occurrence of a string within another, case insensitive. |
8572
|
|
|
* |
8573
|
|
|
* @see http://php.net/manual/en/function.mb-stripos.php |
8574
|
|
|
* |
8575
|
|
|
* @param string $haystack <p>The string from which to get the position of the first occurrence of needle.</p> |
8576
|
|
|
* @param string $needle <p>The string to find in haystack.</p> |
8577
|
|
|
* @param int $offset [optional] <p>The position in haystack to start searching.</p> |
8578
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8579
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
8580
|
|
|
* |
8581
|
|
|
* @return false|int |
8582
|
|
|
* Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the |
8583
|
|
|
* haystack string,<br> or <strong>false</strong> if needle is not found |
8584
|
|
|
*/ |
8585
|
24 |
|
public static function stripos( |
8586
|
|
|
string $haystack, |
8587
|
|
|
string $needle, |
8588
|
|
|
int $offset = 0, |
8589
|
|
|
$encoding = 'UTF-8', |
8590
|
|
|
bool $cleanUtf8 = false |
8591
|
|
|
) { |
8592
|
24 |
|
if ($haystack === '' || $needle === '') { |
8593
|
5 |
|
return false; |
8594
|
|
|
} |
8595
|
|
|
|
8596
|
23 |
|
if ($cleanUtf8 === true) { |
8597
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
8598
|
|
|
// if invalid characters are found in $haystack before $needle |
8599
|
1 |
|
$haystack = self::clean($haystack); |
8600
|
1 |
|
$needle = self::clean($needle); |
8601
|
|
|
} |
8602
|
|
|
|
8603
|
23 |
|
if (self::$SUPPORT['mbstring'] === true) { |
8604
|
23 |
|
if ($encoding === 'UTF-8') { |
8605
|
23 |
|
return \mb_stripos($haystack, $needle, $offset); |
8606
|
|
|
} |
8607
|
|
|
|
8608
|
3 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8609
|
|
|
|
8610
|
3 |
|
return \mb_stripos($haystack, $needle, $offset, $encoding); |
8611
|
|
|
} |
8612
|
|
|
|
8613
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8614
|
|
|
|
8615
|
|
|
if ( |
8616
|
2 |
|
$encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings |
8617
|
|
|
&& |
8618
|
2 |
|
$offset >= 0 // grapheme_stripos() can't handle negative offset |
8619
|
|
|
&& |
8620
|
2 |
|
self::$SUPPORT['intl'] === true |
8621
|
|
|
) { |
8622
|
|
|
$returnTmp = \grapheme_stripos($haystack, $needle, $offset); |
8623
|
|
|
if ($returnTmp !== false) { |
8624
|
|
|
return $returnTmp; |
8625
|
|
|
} |
8626
|
|
|
} |
8627
|
|
|
|
8628
|
|
|
// |
8629
|
|
|
// fallback for ascii only |
8630
|
|
|
// |
8631
|
|
|
|
8632
|
2 |
|
if (self::is_ascii($haystack . $needle)) { |
8633
|
|
|
return \stripos($haystack, $needle, $offset); |
8634
|
|
|
} |
8635
|
|
|
|
8636
|
|
|
// |
8637
|
|
|
// fallback via vanilla php |
8638
|
|
|
// |
8639
|
|
|
|
8640
|
2 |
|
$haystack = self::strtocasefold($haystack, true, false, $encoding, null, false); |
8641
|
2 |
|
$needle = self::strtocasefold($needle, true, false, $encoding, null, false); |
8642
|
|
|
|
8643
|
2 |
|
return self::strpos($haystack, $needle, $offset, $encoding); |
8644
|
|
|
} |
8645
|
|
|
|
8646
|
|
|
/** |
8647
|
|
|
* Returns all of haystack starting from and including the first occurrence of needle to the end. |
8648
|
|
|
* |
8649
|
|
|
* @param string $haystack <p>The input string. Must be valid UTF-8.</p> |
8650
|
|
|
* @param string $needle <p>The string to look for. Must be valid UTF-8.</p> |
8651
|
|
|
* @param bool $before_needle [optional] <p> |
8652
|
|
|
* If <b>TRUE</b>, it returns the part of the |
8653
|
|
|
* haystack before the first occurrence of the needle (excluding the needle). |
8654
|
|
|
* </p> |
8655
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8656
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
8657
|
|
|
* |
8658
|
|
|
* @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found |
8659
|
|
|
*/ |
8660
|
12 |
|
public static function stristr( |
8661
|
|
|
string $haystack, |
8662
|
|
|
string $needle, |
8663
|
|
|
bool $before_needle = false, |
8664
|
|
|
string $encoding = 'UTF-8', |
8665
|
|
|
bool $cleanUtf8 = false |
8666
|
|
|
) { |
8667
|
12 |
|
if ($haystack === '' || $needle === '') { |
8668
|
3 |
|
return false; |
8669
|
|
|
} |
8670
|
|
|
|
8671
|
9 |
|
if ($cleanUtf8 === true) { |
8672
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
8673
|
|
|
// if invalid characters are found in $haystack before $needle |
8674
|
1 |
|
$needle = self::clean($needle); |
8675
|
1 |
|
$haystack = self::clean($haystack); |
8676
|
|
|
} |
8677
|
|
|
|
8678
|
9 |
|
if (!$needle) { |
8679
|
|
|
return $haystack; |
8680
|
|
|
} |
8681
|
|
|
|
8682
|
9 |
|
if (self::$SUPPORT['mbstring'] === true) { |
8683
|
9 |
|
if ($encoding === 'UTF-8') { |
8684
|
9 |
|
return \mb_stristr($haystack, $needle, $before_needle); |
8685
|
|
|
} |
8686
|
|
|
|
8687
|
1 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8688
|
|
|
|
8689
|
1 |
|
return \mb_stristr($haystack, $needle, $before_needle, $encoding); |
8690
|
|
|
} |
8691
|
|
|
|
8692
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8693
|
|
|
|
8694
|
|
|
if ( |
8695
|
|
|
$encoding !== 'UTF-8' |
8696
|
|
|
&& |
8697
|
|
|
self::$SUPPORT['mbstring'] === false |
8698
|
|
|
) { |
8699
|
|
|
\trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
8700
|
|
|
} |
8701
|
|
|
|
8702
|
|
|
if ( |
8703
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings |
8704
|
|
|
&& |
8705
|
|
|
self::$SUPPORT['intl'] === true |
8706
|
|
|
) { |
8707
|
|
|
$returnTmp = \grapheme_stristr($haystack, $needle, $before_needle); |
8708
|
|
|
if ($returnTmp !== false) { |
8709
|
|
|
return $returnTmp; |
8710
|
|
|
} |
8711
|
|
|
} |
8712
|
|
|
|
8713
|
|
|
if (self::is_ascii($needle . $haystack)) { |
8714
|
|
|
return \stristr($haystack, $needle, $before_needle); |
8715
|
|
|
} |
8716
|
|
|
|
8717
|
|
|
\preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match); |
8718
|
|
|
|
8719
|
|
|
if (!isset($match[1])) { |
8720
|
|
|
return false; |
8721
|
|
|
} |
8722
|
|
|
|
8723
|
|
|
if ($before_needle) { |
8724
|
|
|
return $match[1]; |
8725
|
|
|
} |
8726
|
|
|
|
8727
|
|
|
return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding); |
8728
|
|
|
} |
8729
|
|
|
|
8730
|
|
|
/** |
8731
|
|
|
* Get the string length, not the byte-length! |
8732
|
|
|
* |
8733
|
|
|
* @see http://php.net/manual/en/function.mb-strlen.php |
8734
|
|
|
* |
8735
|
|
|
* @param string $str <p>The string being checked for length.</p> |
8736
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8737
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
8738
|
|
|
* |
8739
|
|
|
* @return false|int |
8740
|
|
|
* The number <strong>(int)</strong> of characters in the string $str having character encoding |
8741
|
|
|
* $encoding. |
8742
|
|
|
* (One multi-byte character counted as +1). |
8743
|
|
|
* <br> |
8744
|
|
|
* Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid |
8745
|
|
|
* chars. |
8746
|
|
|
*/ |
8747
|
173 |
|
public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false) |
8748
|
|
|
{ |
8749
|
173 |
|
if ($str === '') { |
8750
|
21 |
|
return 0; |
8751
|
|
|
} |
8752
|
|
|
|
8753
|
171 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
8754
|
12 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8755
|
|
|
} |
8756
|
|
|
|
8757
|
171 |
|
if ($cleanUtf8 === true) { |
8758
|
|
|
// "mb_strlen" and "\iconv_strlen" returns wrong length, |
8759
|
|
|
// if invalid characters are found in $str |
8760
|
4 |
|
$str = self::clean($str); |
8761
|
|
|
} |
8762
|
|
|
|
8763
|
|
|
// |
8764
|
|
|
// fallback via mbstring |
8765
|
|
|
// |
8766
|
|
|
|
8767
|
171 |
|
if (self::$SUPPORT['mbstring'] === true) { |
8768
|
165 |
|
if ($encoding === 'UTF-8') { |
8769
|
165 |
|
return \mb_strlen($str); |
8770
|
|
|
} |
8771
|
|
|
|
8772
|
4 |
|
return \mb_strlen($str, $encoding); |
8773
|
|
|
} |
8774
|
|
|
|
8775
|
|
|
// |
8776
|
|
|
// fallback for binary || ascii only |
8777
|
|
|
// |
8778
|
|
|
|
8779
|
|
|
if ( |
8780
|
8 |
|
$encoding === 'CP850' |
8781
|
|
|
|| |
8782
|
8 |
|
$encoding === 'ASCII' |
8783
|
|
|
) { |
8784
|
|
|
return \strlen($str); |
8785
|
|
|
} |
8786
|
|
|
|
8787
|
|
|
if ( |
8788
|
8 |
|
$encoding !== 'UTF-8' |
8789
|
|
|
&& |
8790
|
8 |
|
self::$SUPPORT['mbstring'] === false |
8791
|
|
|
&& |
8792
|
8 |
|
self::$SUPPORT['iconv'] === false |
8793
|
|
|
) { |
8794
|
2 |
|
\trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
8795
|
|
|
} |
8796
|
|
|
|
8797
|
|
|
// |
8798
|
|
|
// fallback via iconv |
8799
|
|
|
// |
8800
|
|
|
|
8801
|
8 |
|
if (self::$SUPPORT['iconv'] === true) { |
8802
|
|
|
$returnTmp = \iconv_strlen($str, $encoding); |
8803
|
|
|
if ($returnTmp !== false) { |
8804
|
|
|
return $returnTmp; |
8805
|
|
|
} |
8806
|
|
|
} |
8807
|
|
|
|
8808
|
|
|
// |
8809
|
|
|
// fallback via intl |
8810
|
|
|
// |
8811
|
|
|
|
8812
|
|
|
if ( |
8813
|
8 |
|
$encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings |
8814
|
|
|
&& |
8815
|
8 |
|
self::$SUPPORT['intl'] === true |
8816
|
|
|
) { |
8817
|
|
|
$returnTmp = \grapheme_strlen($str); |
8818
|
|
|
if ($returnTmp !== null) { |
8819
|
|
|
return $returnTmp; |
8820
|
|
|
} |
8821
|
|
|
} |
8822
|
|
|
|
8823
|
|
|
// |
8824
|
|
|
// fallback for ascii only |
8825
|
|
|
// |
8826
|
|
|
|
8827
|
8 |
|
if (self::is_ascii($str)) { |
8828
|
4 |
|
return \strlen($str); |
8829
|
|
|
} |
8830
|
|
|
|
8831
|
|
|
// |
8832
|
|
|
// fallback via vanilla php |
8833
|
|
|
// |
8834
|
|
|
|
8835
|
8 |
|
\preg_match_all('/./us', $str, $parts); |
8836
|
|
|
|
8837
|
8 |
|
$returnTmp = \count($parts[0]); |
8838
|
8 |
|
if ($returnTmp === 0) { |
8839
|
|
|
return false; |
8840
|
|
|
} |
8841
|
|
|
|
8842
|
8 |
|
return $returnTmp; |
8843
|
|
|
} |
8844
|
|
|
|
8845
|
|
|
/** |
8846
|
|
|
* Get string length in byte. |
8847
|
|
|
* |
8848
|
|
|
* @param string $str |
8849
|
|
|
* |
8850
|
|
|
* @return int |
8851
|
|
|
*/ |
8852
|
|
|
public static function strlen_in_byte(string $str): int |
8853
|
|
|
{ |
8854
|
|
|
if ($str === '') { |
8855
|
|
|
return 0; |
8856
|
|
|
} |
8857
|
|
|
|
8858
|
|
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
8859
|
|
|
// "mb_" is available if overload is used, so use it ... |
8860
|
|
|
return \mb_strlen($str, 'CP850'); // 8-BIT |
8861
|
|
|
} |
8862
|
|
|
|
8863
|
|
|
return \strlen($str); |
8864
|
|
|
} |
8865
|
|
|
|
8866
|
|
|
/** |
8867
|
|
|
* Case insensitive string comparisons using a "natural order" algorithm. |
8868
|
|
|
* |
8869
|
|
|
* INFO: natural order version of UTF8::strcasecmp() |
8870
|
|
|
* |
8871
|
|
|
* @param string $str1 <p>The first string.</p> |
8872
|
|
|
* @param string $str2 <p>The second string.</p> |
8873
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8874
|
|
|
* |
8875
|
|
|
* @return int |
8876
|
|
|
* <strong>< 0</strong> if str1 is less than str2<br> |
8877
|
|
|
* <strong>> 0</strong> if str1 is greater than str2<br> |
8878
|
|
|
* <strong>0</strong> if they are equal |
8879
|
|
|
*/ |
8880
|
2 |
|
public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int |
8881
|
|
|
{ |
8882
|
2 |
|
return self::strnatcmp( |
8883
|
2 |
|
self::strtocasefold($str1, true, false, $encoding, null, false), |
8884
|
2 |
|
self::strtocasefold($str2, true, false, $encoding, null, false) |
8885
|
|
|
); |
8886
|
|
|
} |
8887
|
|
|
|
8888
|
|
|
/** |
8889
|
|
|
* String comparisons using a "natural order" algorithm |
8890
|
|
|
* |
8891
|
|
|
* INFO: natural order version of UTF8::strcmp() |
8892
|
|
|
* |
8893
|
|
|
* @see http://php.net/manual/en/function.strnatcmp.php |
8894
|
|
|
* |
8895
|
|
|
* @param string $str1 <p>The first string.</p> |
8896
|
|
|
* @param string $str2 <p>The second string.</p> |
8897
|
|
|
* |
8898
|
|
|
* @return int |
8899
|
|
|
* <strong>< 0</strong> if str1 is less than str2;<br> |
8900
|
|
|
* <strong>> 0</strong> if str1 is greater than str2;<br> |
8901
|
|
|
* <strong>0</strong> if they are equal |
8902
|
|
|
*/ |
8903
|
4 |
|
public static function strnatcmp(string $str1, string $str2): int |
8904
|
|
|
{ |
8905
|
4 |
|
if ($str1 === $str2) { |
8906
|
4 |
|
return 0; |
8907
|
|
|
} |
8908
|
|
|
|
8909
|
4 |
|
return \strnatcmp( |
8910
|
4 |
|
(string) self::strtonatfold($str1), |
8911
|
4 |
|
(string) self::strtonatfold($str2) |
8912
|
|
|
); |
8913
|
|
|
} |
8914
|
|
|
|
8915
|
|
|
/** |
8916
|
|
|
* Case-insensitive string comparison of the first n characters. |
8917
|
|
|
* |
8918
|
|
|
* @see http://php.net/manual/en/function.strncasecmp.php |
8919
|
|
|
* |
8920
|
|
|
* @param string $str1 <p>The first string.</p> |
8921
|
|
|
* @param string $str2 <p>The second string.</p> |
8922
|
|
|
* @param int $len <p>The length of strings to be used in the comparison.</p> |
8923
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8924
|
|
|
* |
8925
|
|
|
* @return int |
8926
|
|
|
* <strong>< 0</strong> if <i>str1</i> is less than <i>str2</i>;<br> |
8927
|
|
|
* <strong>> 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br> |
8928
|
|
|
* <strong>0</strong> if they are equal |
8929
|
|
|
*/ |
8930
|
2 |
|
public static function strncasecmp( |
8931
|
|
|
string $str1, |
8932
|
|
|
string $str2, |
8933
|
|
|
int $len, |
8934
|
|
|
string $encoding = 'UTF-8' |
8935
|
|
|
): int { |
8936
|
2 |
|
return self::strncmp( |
8937
|
2 |
|
self::strtocasefold($str1, true, false, $encoding, null, false), |
8938
|
2 |
|
self::strtocasefold($str2, true, false, $encoding, null, false), |
8939
|
2 |
|
$len |
8940
|
|
|
); |
8941
|
|
|
} |
8942
|
|
|
|
8943
|
|
|
/** |
8944
|
|
|
* String comparison of the first n characters. |
8945
|
|
|
* |
8946
|
|
|
* @see http://php.net/manual/en/function.strncmp.php |
8947
|
|
|
* |
8948
|
|
|
* @param string $str1 <p>The first string.</p> |
8949
|
|
|
* @param string $str2 <p>The second string.</p> |
8950
|
|
|
* @param int $len <p>Number of characters to use in the comparison.</p> |
8951
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8952
|
|
|
* |
8953
|
|
|
* @return int |
8954
|
|
|
* <strong>< 0</strong> if <i>str1</i> is less than <i>str2</i>;<br> |
8955
|
|
|
* <strong>> 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br> |
8956
|
|
|
* <strong>0</strong> if they are equal |
8957
|
|
|
*/ |
8958
|
4 |
|
public static function strncmp( |
8959
|
|
|
string $str1, |
8960
|
|
|
string $str2, |
8961
|
|
|
int $len, |
8962
|
|
|
string $encoding = 'UTF-8' |
8963
|
|
|
): int { |
8964
|
4 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
8965
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8966
|
|
|
} |
8967
|
|
|
|
8968
|
4 |
|
if ($encoding === 'UTF-8') { |
8969
|
4 |
|
$str1 = (string) \mb_substr($str1, 0, $len); |
8970
|
4 |
|
$str2 = (string) \mb_substr($str2, 0, $len); |
8971
|
|
|
} else { |
8972
|
|
|
$str1 = (string) self::substr($str1, 0, $len, $encoding); |
8973
|
|
|
$str2 = (string) self::substr($str2, 0, $len, $encoding); |
8974
|
|
|
} |
8975
|
|
|
|
8976
|
4 |
|
return self::strcmp($str1, $str2); |
8977
|
|
|
} |
8978
|
|
|
|
8979
|
|
|
/** |
8980
|
|
|
* Search a string for any of a set of characters. |
8981
|
|
|
* |
8982
|
|
|
* @see http://php.net/manual/en/function.strpbrk.php |
8983
|
|
|
* |
8984
|
|
|
* @param string $haystack <p>The string where char_list is looked for.</p> |
8985
|
|
|
* @param string $char_list <p>This parameter is case sensitive.</p> |
8986
|
|
|
* |
8987
|
|
|
* @return false|string string starting from the character found, or false if it is not found |
8988
|
|
|
*/ |
8989
|
2 |
|
public static function strpbrk(string $haystack, string $char_list) |
8990
|
|
|
{ |
8991
|
2 |
|
if ($haystack === '' || $char_list === '') { |
8992
|
2 |
|
return false; |
8993
|
|
|
} |
8994
|
|
|
|
8995
|
2 |
|
if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) { |
8996
|
2 |
|
return \substr($haystack, (int) \strpos($haystack, $m[0])); |
8997
|
|
|
} |
8998
|
|
|
|
8999
|
2 |
|
return false; |
9000
|
|
|
} |
9001
|
|
|
|
9002
|
|
|
/** |
9003
|
|
|
* Find position of first occurrence of string in a string. |
9004
|
|
|
* |
9005
|
|
|
* @see http://php.net/manual/en/function.mb-strpos.php |
9006
|
|
|
* |
9007
|
|
|
* @param string $haystack <p>The string from which to get the position of the first occurrence of needle.</p> |
9008
|
|
|
* @param int|string $needle <p>The string to find in haystack.<br>Or a code point as int.</p> |
9009
|
|
|
* @param int $offset [optional] <p>The search offset. If it is not specified, 0 is used.</p> |
9010
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9011
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9012
|
|
|
* |
9013
|
|
|
* @return false|int |
9014
|
|
|
* The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack |
9015
|
|
|
* string.<br> If needle is not found it returns false. |
9016
|
|
|
*/ |
9017
|
53 |
|
public static function strpos( |
9018
|
|
|
string $haystack, |
9019
|
|
|
$needle, |
9020
|
|
|
int $offset = 0, |
9021
|
|
|
$encoding = 'UTF-8', |
9022
|
|
|
bool $cleanUtf8 = false |
9023
|
|
|
) { |
9024
|
53 |
|
if ($haystack === '') { |
9025
|
4 |
|
return false; |
9026
|
|
|
} |
9027
|
|
|
|
9028
|
|
|
// iconv and mbstring do not support integer $needle |
9029
|
52 |
|
if ((int) $needle === $needle) { |
9030
|
|
|
$needle = (string) self::chr($needle); |
9031
|
|
|
} |
9032
|
52 |
|
$needle = (string) $needle; |
9033
|
|
|
|
9034
|
52 |
|
if ($needle === '') { |
9035
|
2 |
|
return false; |
9036
|
|
|
} |
9037
|
|
|
|
9038
|
52 |
|
if ($cleanUtf8 === true) { |
9039
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
9040
|
|
|
// if invalid characters are found in $haystack before $needle |
9041
|
3 |
|
$needle = self::clean($needle); |
9042
|
3 |
|
$haystack = self::clean($haystack); |
9043
|
|
|
} |
9044
|
|
|
|
9045
|
52 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9046
|
11 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9047
|
|
|
} |
9048
|
|
|
|
9049
|
|
|
// |
9050
|
|
|
// fallback via mbstring |
9051
|
|
|
// |
9052
|
|
|
|
9053
|
52 |
|
if (self::$SUPPORT['mbstring'] === true) { |
9054
|
50 |
|
if ($encoding === 'UTF-8') { |
9055
|
50 |
|
return \mb_strpos($haystack, $needle, $offset); |
9056
|
|
|
} |
9057
|
|
|
|
9058
|
2 |
|
return \mb_strpos($haystack, $needle, $offset, $encoding); |
9059
|
|
|
} |
9060
|
|
|
|
9061
|
|
|
// |
9062
|
|
|
// fallback for binary || ascii only |
9063
|
|
|
// |
9064
|
|
|
if ( |
9065
|
4 |
|
$encoding === 'CP850' |
9066
|
|
|
|| |
9067
|
4 |
|
$encoding === 'ASCII' |
9068
|
|
|
) { |
9069
|
2 |
|
return \strpos($haystack, $needle, $offset); |
9070
|
|
|
} |
9071
|
|
|
|
9072
|
|
|
if ( |
9073
|
4 |
|
$encoding !== 'UTF-8' |
9074
|
|
|
&& |
9075
|
4 |
|
self::$SUPPORT['iconv'] === false |
9076
|
|
|
&& |
9077
|
4 |
|
self::$SUPPORT['mbstring'] === false |
9078
|
|
|
) { |
9079
|
2 |
|
\trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
9080
|
|
|
} |
9081
|
|
|
|
9082
|
|
|
// |
9083
|
|
|
// fallback via intl |
9084
|
|
|
// |
9085
|
|
|
|
9086
|
|
|
if ( |
9087
|
4 |
|
$encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings |
9088
|
|
|
&& |
9089
|
4 |
|
$offset >= 0 // grapheme_strpos() can't handle negative offset |
9090
|
|
|
&& |
9091
|
4 |
|
self::$SUPPORT['intl'] === true |
9092
|
|
|
) { |
9093
|
|
|
$returnTmp = \grapheme_strpos($haystack, $needle, $offset); |
9094
|
|
|
if ($returnTmp !== false) { |
9095
|
|
|
return $returnTmp; |
9096
|
|
|
} |
9097
|
|
|
} |
9098
|
|
|
|
9099
|
|
|
// |
9100
|
|
|
// fallback via iconv |
9101
|
|
|
// |
9102
|
|
|
|
9103
|
|
|
if ( |
9104
|
4 |
|
$offset >= 0 // iconv_strpos() can't handle negative offset |
9105
|
|
|
&& |
9106
|
4 |
|
self::$SUPPORT['iconv'] === true |
9107
|
|
|
) { |
9108
|
|
|
// ignore invalid negative offset to keep compatibility |
9109
|
|
|
// with php < 5.5.35, < 5.6.21, < 7.0.6 |
9110
|
|
|
$returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding); |
9111
|
|
|
if ($returnTmp !== false) { |
9112
|
|
|
return $returnTmp; |
9113
|
|
|
} |
9114
|
|
|
} |
9115
|
|
|
|
9116
|
|
|
// |
9117
|
|
|
// fallback for ascii only |
9118
|
|
|
// |
9119
|
|
|
|
9120
|
4 |
|
if (self::is_ascii($haystack . $needle)) { |
9121
|
2 |
|
return \strpos($haystack, $needle, $offset); |
9122
|
|
|
} |
9123
|
|
|
|
9124
|
|
|
// |
9125
|
|
|
// fallback via vanilla php |
9126
|
|
|
// |
9127
|
|
|
|
9128
|
4 |
|
$haystackTmp = self::substr($haystack, $offset, null, $encoding); |
9129
|
4 |
|
if ($haystackTmp === false) { |
9130
|
|
|
$haystackTmp = ''; |
9131
|
|
|
} |
9132
|
4 |
|
$haystack = (string) $haystackTmp; |
9133
|
|
|
|
9134
|
4 |
|
if ($offset < 0) { |
9135
|
|
|
$offset = 0; |
9136
|
|
|
} |
9137
|
|
|
|
9138
|
4 |
|
$pos = \strpos($haystack, $needle); |
9139
|
4 |
|
if ($pos === false) { |
9140
|
2 |
|
return false; |
9141
|
|
|
} |
9142
|
|
|
|
9143
|
4 |
|
if ($pos) { |
9144
|
4 |
|
return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding); |
9145
|
|
|
} |
9146
|
|
|
|
9147
|
2 |
|
return $offset + 0; |
9148
|
|
|
} |
9149
|
|
|
|
9150
|
|
|
/** |
9151
|
|
|
* Find position of first occurrence of string in a string. |
9152
|
|
|
* |
9153
|
|
|
* @param string $haystack <p> |
9154
|
|
|
* The string being checked. |
9155
|
|
|
* </p> |
9156
|
|
|
* @param string $needle <p> |
9157
|
|
|
* The position counted from the beginning of haystack. |
9158
|
|
|
* </p> |
9159
|
|
|
* @param int $offset [optional] <p> |
9160
|
|
|
* The search offset. If it is not specified, 0 is used. |
9161
|
|
|
* </p> |
9162
|
|
|
* |
9163
|
|
|
* @return false|int The numeric position of the first occurrence of needle in the |
9164
|
|
|
* haystack string. If needle is not found, it returns false. |
9165
|
|
|
*/ |
9166
|
|
|
public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0) |
9167
|
|
|
{ |
9168
|
|
|
if ($haystack === '' || $needle === '') { |
9169
|
|
|
return false; |
9170
|
|
|
} |
9171
|
|
|
|
9172
|
|
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
9173
|
|
|
// "mb_" is available if overload is used, so use it ... |
9174
|
|
|
return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT |
9175
|
|
|
} |
9176
|
|
|
|
9177
|
|
|
return \strpos($haystack, $needle, $offset); |
9178
|
|
|
} |
9179
|
|
|
|
9180
|
|
|
/** |
9181
|
|
|
* Finds the last occurrence of a character in a string within another. |
9182
|
|
|
* |
9183
|
|
|
* @see http://php.net/manual/en/function.mb-strrchr.php |
9184
|
|
|
* |
9185
|
|
|
* @param string $haystack <p>The string from which to get the last occurrence of needle.</p> |
9186
|
|
|
* @param string $needle <p>The string to find in haystack</p> |
9187
|
|
|
* @param bool $before_needle [optional] <p> |
9188
|
|
|
* Determines which portion of haystack |
9189
|
|
|
* this function returns. |
9190
|
|
|
* If set to true, it returns all of haystack |
9191
|
|
|
* from the beginning to the last occurrence of needle. |
9192
|
|
|
* If set to false, it returns all of haystack |
9193
|
|
|
* from the last occurrence of needle to the end, |
9194
|
|
|
* </p> |
9195
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9196
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9197
|
|
|
* |
9198
|
|
|
* @return false|string the portion of haystack or false if needle is not found |
9199
|
|
|
*/ |
9200
|
2 |
|
public static function strrchr( |
9201
|
|
|
string $haystack, |
9202
|
|
|
string $needle, |
9203
|
|
|
bool $before_needle = false, |
9204
|
|
|
string $encoding = 'UTF-8', |
9205
|
|
|
bool $cleanUtf8 = false |
9206
|
|
|
) { |
9207
|
2 |
|
if ($haystack === '' || $needle === '') { |
9208
|
2 |
|
return false; |
9209
|
|
|
} |
9210
|
|
|
|
9211
|
2 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9212
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9213
|
|
|
} |
9214
|
|
|
|
9215
|
2 |
|
if ($cleanUtf8 === true) { |
9216
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
9217
|
|
|
// if invalid characters are found in $haystack before $needle |
9218
|
2 |
|
$needle = self::clean($needle); |
9219
|
2 |
|
$haystack = self::clean($haystack); |
9220
|
|
|
} |
9221
|
|
|
|
9222
|
|
|
// |
9223
|
|
|
// fallback via mbstring |
9224
|
|
|
// |
9225
|
|
|
|
9226
|
2 |
|
if (self::$SUPPORT['mbstring'] === true) { |
9227
|
2 |
|
if ($encoding === 'UTF-8') { |
9228
|
2 |
|
return \mb_strrchr($haystack, $needle, $before_needle); |
9229
|
|
|
} |
9230
|
|
|
|
9231
|
2 |
|
return \mb_strrchr($haystack, $needle, $before_needle, $encoding); |
9232
|
|
|
} |
9233
|
|
|
|
9234
|
|
|
// |
9235
|
|
|
// fallback for binary || ascii only |
9236
|
|
|
// |
9237
|
|
|
|
9238
|
|
|
if ( |
9239
|
|
|
$before_needle === false |
9240
|
|
|
&& |
9241
|
|
|
( |
9242
|
|
|
$encoding === 'CP850' |
9243
|
|
|
|| |
9244
|
|
|
$encoding === 'ASCII' |
9245
|
|
|
) |
9246
|
|
|
) { |
9247
|
|
|
return \strrchr($haystack, $needle); |
9248
|
|
|
} |
9249
|
|
|
|
9250
|
|
|
if ( |
9251
|
|
|
$encoding !== 'UTF-8' |
9252
|
|
|
&& |
9253
|
|
|
self::$SUPPORT['mbstring'] === false |
9254
|
|
|
) { |
9255
|
|
|
\trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
9256
|
|
|
} |
9257
|
|
|
|
9258
|
|
|
// |
9259
|
|
|
// fallback via iconv |
9260
|
|
|
// |
9261
|
|
|
|
9262
|
|
|
if (self::$SUPPORT['iconv'] === true) { |
9263
|
|
|
$needleTmp = self::substr($needle, 0, 1, $encoding); |
9264
|
|
|
if ($needleTmp === false) { |
9265
|
|
|
return false; |
9266
|
|
|
} |
9267
|
|
|
$needle = (string) $needleTmp; |
9268
|
|
|
|
9269
|
|
|
$pos = \iconv_strrpos($haystack, $needle, $encoding); |
9270
|
|
|
if ($pos === false) { |
9271
|
|
|
return false; |
9272
|
|
|
} |
9273
|
|
|
|
9274
|
|
|
if ($before_needle) { |
9275
|
|
|
return self::substr($haystack, 0, $pos, $encoding); |
9276
|
|
|
} |
9277
|
|
|
|
9278
|
|
|
return self::substr($haystack, $pos, null, $encoding); |
9279
|
|
|
} |
9280
|
|
|
|
9281
|
|
|
// |
9282
|
|
|
// fallback via vanilla php |
9283
|
|
|
// |
9284
|
|
|
|
9285
|
|
|
$needleTmp = self::substr($needle, 0, 1, $encoding); |
9286
|
|
|
if ($needleTmp === false) { |
9287
|
|
|
return false; |
9288
|
|
|
} |
9289
|
|
|
$needle = (string) $needleTmp; |
9290
|
|
|
|
9291
|
|
|
$pos = self::strrpos($haystack, $needle, 0, $encoding); |
9292
|
|
|
if ($pos === false) { |
9293
|
|
|
return false; |
9294
|
|
|
} |
9295
|
|
|
|
9296
|
|
|
if ($before_needle) { |
9297
|
|
|
return self::substr($haystack, 0, $pos, $encoding); |
9298
|
|
|
} |
9299
|
|
|
|
9300
|
|
|
return self::substr($haystack, $pos, null, $encoding); |
9301
|
|
|
} |
9302
|
|
|
|
9303
|
|
|
/** |
9304
|
|
|
* Reverses characters order in the string. |
9305
|
|
|
* |
9306
|
|
|
* @param string $str <p>The input string.</p> |
9307
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9308
|
|
|
* |
9309
|
|
|
* @return string the string with characters in the reverse sequence |
9310
|
|
|
*/ |
9311
|
10 |
|
public static function strrev(string $str, string $encoding = 'UTF-8'): string |
9312
|
|
|
{ |
9313
|
10 |
|
if ($str === '') { |
9314
|
4 |
|
return ''; |
9315
|
|
|
} |
9316
|
|
|
|
9317
|
|
|
// init |
9318
|
8 |
|
$reversed = ''; |
9319
|
|
|
|
9320
|
8 |
|
$str = self::emoji_encode($str, true); |
9321
|
|
|
|
9322
|
8 |
|
if ($encoding === 'UTF-8') { |
9323
|
8 |
|
if (self::$SUPPORT['intl'] === true) { |
9324
|
|
|
// try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8 |
9325
|
8 |
|
$i = (int) \grapheme_strlen($str); |
9326
|
8 |
|
while ($i--) { |
9327
|
8 |
|
$reversedTmp = \grapheme_substr($str, $i, 1); |
9328
|
8 |
|
if ($reversedTmp !== false) { |
9329
|
8 |
|
$reversed .= $reversedTmp; |
9330
|
|
|
} |
9331
|
|
|
} |
9332
|
|
|
} else { |
9333
|
|
|
$i = (int) \mb_strlen($str); |
9334
|
8 |
|
while ($i--) { |
9335
|
|
|
$reversedTmp = \mb_substr($str, $i, 1); |
9336
|
|
|
if ($reversedTmp !== false) { |
9337
|
|
|
$reversed .= $reversedTmp; |
9338
|
|
|
} |
9339
|
|
|
} |
9340
|
|
|
} |
9341
|
|
|
} else { |
9342
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9343
|
|
|
|
9344
|
|
|
$i = (int) self::strlen($str, $encoding); |
9345
|
|
|
while ($i--) { |
9346
|
|
|
$reversedTmp = self::substr($str, $i, 1, $encoding); |
9347
|
|
|
if ($reversedTmp !== false) { |
9348
|
|
|
$reversed .= $reversedTmp; |
9349
|
|
|
} |
9350
|
|
|
} |
9351
|
|
|
} |
9352
|
|
|
|
9353
|
8 |
|
return self::emoji_decode($reversed, true); |
9354
|
|
|
} |
9355
|
|
|
|
9356
|
|
|
/** |
9357
|
|
|
* Finds the last occurrence of a character in a string within another, case insensitive. |
9358
|
|
|
* |
9359
|
|
|
* @see http://php.net/manual/en/function.mb-strrichr.php |
9360
|
|
|
* |
9361
|
|
|
* @param string $haystack <p>The string from which to get the last occurrence of needle.</p> |
9362
|
|
|
* @param string $needle <p>The string to find in haystack.</p> |
9363
|
|
|
* @param bool $before_needle [optional] <p> |
9364
|
|
|
* Determines which portion of haystack |
9365
|
|
|
* this function returns. |
9366
|
|
|
* If set to true, it returns all of haystack |
9367
|
|
|
* from the beginning to the last occurrence of needle. |
9368
|
|
|
* If set to false, it returns all of haystack |
9369
|
|
|
* from the last occurrence of needle to the end, |
9370
|
|
|
* </p> |
9371
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9372
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9373
|
|
|
* |
9374
|
|
|
* @return false|string the portion of haystack or<br>false if needle is not found |
9375
|
|
|
*/ |
9376
|
3 |
|
public static function strrichr( |
9377
|
|
|
string $haystack, |
9378
|
|
|
string $needle, |
9379
|
|
|
bool $before_needle = false, |
9380
|
|
|
string $encoding = 'UTF-8', |
9381
|
|
|
bool $cleanUtf8 = false |
9382
|
|
|
) { |
9383
|
3 |
|
if ($haystack === '' || $needle === '') { |
9384
|
2 |
|
return false; |
9385
|
|
|
} |
9386
|
|
|
|
9387
|
3 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9388
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9389
|
|
|
} |
9390
|
|
|
|
9391
|
3 |
|
if ($cleanUtf8 === true) { |
9392
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
9393
|
|
|
// if invalid characters are found in $haystack before $needle |
9394
|
2 |
|
$needle = self::clean($needle); |
9395
|
2 |
|
$haystack = self::clean($haystack); |
9396
|
|
|
} |
9397
|
|
|
|
9398
|
|
|
// |
9399
|
|
|
// fallback via mbstring |
9400
|
|
|
// |
9401
|
|
|
|
9402
|
3 |
|
if (self::$SUPPORT['mbstring'] === true) { |
9403
|
3 |
|
if ($encoding === 'UTF-8') { |
9404
|
3 |
|
return \mb_strrichr($haystack, $needle, $before_needle); |
9405
|
|
|
} |
9406
|
|
|
|
9407
|
2 |
|
return \mb_strrichr($haystack, $needle, $before_needle, $encoding); |
9408
|
|
|
} |
9409
|
|
|
|
9410
|
|
|
// |
9411
|
|
|
// fallback via vanilla php |
9412
|
|
|
// |
9413
|
|
|
|
9414
|
|
|
$needleTmp = self::substr($needle, 0, 1, $encoding); |
9415
|
|
|
if ($needleTmp === false) { |
9416
|
|
|
return false; |
9417
|
|
|
} |
9418
|
|
|
$needle = (string) $needleTmp; |
9419
|
|
|
|
9420
|
|
|
$pos = self::strripos($haystack, $needle, 0, $encoding); |
9421
|
|
|
if ($pos === false) { |
9422
|
|
|
return false; |
9423
|
|
|
} |
9424
|
|
|
|
9425
|
|
|
if ($before_needle) { |
9426
|
|
|
return self::substr($haystack, 0, $pos, $encoding); |
9427
|
|
|
} |
9428
|
|
|
|
9429
|
|
|
return self::substr($haystack, $pos, null, $encoding); |
9430
|
|
|
} |
9431
|
|
|
|
9432
|
|
|
/** |
9433
|
|
|
* Find position of last occurrence of a case-insensitive string. |
9434
|
|
|
* |
9435
|
|
|
* @param string $haystack <p>The string to look in.</p> |
9436
|
|
|
* @param int|string $needle <p>The string to look for.</p> |
9437
|
|
|
* @param int $offset [optional] <p>Number of characters to ignore in the beginning or end.</p> |
9438
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9439
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9440
|
|
|
* |
9441
|
|
|
* @return false|int |
9442
|
|
|
* The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack |
9443
|
|
|
* string.<br>If needle is not found, it returns false. |
9444
|
|
|
*/ |
9445
|
3 |
|
public static function strripos( |
9446
|
|
|
string $haystack, |
9447
|
|
|
$needle, |
9448
|
|
|
int $offset = 0, |
9449
|
|
|
string $encoding = 'UTF-8', |
9450
|
|
|
bool $cleanUtf8 = false |
9451
|
|
|
) { |
9452
|
3 |
|
if ($haystack === '') { |
9453
|
|
|
return false; |
9454
|
|
|
} |
9455
|
|
|
|
9456
|
|
|
// iconv and mbstring do not support integer $needle |
9457
|
3 |
|
if ((int) $needle === $needle && $needle >= 0) { |
9458
|
|
|
$needle = (string) self::chr($needle); |
9459
|
|
|
} |
9460
|
3 |
|
$needle = (string) $needle; |
9461
|
|
|
|
9462
|
3 |
|
if ($needle === '') { |
9463
|
|
|
return false; |
9464
|
|
|
} |
9465
|
|
|
|
9466
|
3 |
|
if ($cleanUtf8 === true) { |
9467
|
|
|
// mb_strripos() && iconv_strripos() is not tolerant to invalid characters |
9468
|
2 |
|
$needle = self::clean($needle); |
9469
|
2 |
|
$haystack = self::clean($haystack); |
9470
|
|
|
} |
9471
|
|
|
|
9472
|
3 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9473
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9474
|
|
|
} |
9475
|
|
|
|
9476
|
|
|
// |
9477
|
|
|
// fallback via mbstrig |
9478
|
|
|
// |
9479
|
|
|
|
9480
|
3 |
|
if (self::$SUPPORT['mbstring'] === true) { |
9481
|
3 |
|
if ($encoding === 'UTF-8') { |
9482
|
3 |
|
return \mb_strripos($haystack, $needle, $offset); |
9483
|
|
|
} |
9484
|
|
|
|
9485
|
|
|
return \mb_strripos($haystack, $needle, $offset, $encoding); |
9486
|
|
|
} |
9487
|
|
|
|
9488
|
|
|
// |
9489
|
|
|
// fallback for binary || ascii only |
9490
|
|
|
// |
9491
|
|
|
|
9492
|
|
|
if ( |
9493
|
|
|
$encoding === 'CP850' |
9494
|
|
|
|| |
9495
|
|
|
$encoding === 'ASCII' |
9496
|
|
|
) { |
9497
|
|
|
return \strripos($haystack, $needle, $offset); |
9498
|
|
|
} |
9499
|
|
|
|
9500
|
|
|
if ( |
9501
|
|
|
$encoding !== 'UTF-8' |
9502
|
|
|
&& |
9503
|
|
|
self::$SUPPORT['mbstring'] === false |
9504
|
|
|
) { |
9505
|
|
|
\trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
9506
|
|
|
} |
9507
|
|
|
|
9508
|
|
|
// |
9509
|
|
|
// fallback via intl |
9510
|
|
|
// |
9511
|
|
|
|
9512
|
|
|
if ( |
9513
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings |
9514
|
|
|
&& |
9515
|
|
|
$offset >= 0 // grapheme_strripos() can't handle negative offset |
9516
|
|
|
&& |
9517
|
|
|
self::$SUPPORT['intl'] === true |
9518
|
|
|
) { |
9519
|
|
|
$returnTmp = \grapheme_strripos($haystack, $needle, $offset); |
9520
|
|
|
if ($returnTmp !== false) { |
9521
|
|
|
return $returnTmp; |
9522
|
|
|
} |
9523
|
|
|
} |
9524
|
|
|
|
9525
|
|
|
// |
9526
|
|
|
// fallback for ascii only |
9527
|
|
|
// |
9528
|
|
|
|
9529
|
|
|
if (self::is_ascii($haystack . $needle)) { |
9530
|
|
|
return \strripos($haystack, $needle, $offset); |
9531
|
|
|
} |
9532
|
|
|
|
9533
|
|
|
// |
9534
|
|
|
// fallback via vanilla php |
9535
|
|
|
// |
9536
|
|
|
|
9537
|
|
|
$haystack = self::strtocasefold($haystack, true, false, $encoding); |
9538
|
|
|
$needle = self::strtocasefold($needle, true, false, $encoding); |
9539
|
|
|
|
9540
|
|
|
return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8); |
9541
|
|
|
} |
9542
|
|
|
|
9543
|
|
|
/** |
9544
|
|
|
* Finds position of last occurrence of a string within another, case insensitive. |
9545
|
|
|
* |
9546
|
|
|
* @param string $haystack <p> |
9547
|
|
|
* The string from which to get the position of the last occurrence |
9548
|
|
|
* of needle. |
9549
|
|
|
* </p> |
9550
|
|
|
* @param string $needle <p> |
9551
|
|
|
* The string to find in haystack. |
9552
|
|
|
* </p> |
9553
|
|
|
* @param int $offset [optional] <p> |
9554
|
|
|
* The position in haystack |
9555
|
|
|
* to start searching. |
9556
|
|
|
* </p> |
9557
|
|
|
* |
9558
|
|
|
* @return false|int return the numeric position of the last occurrence of needle in the |
9559
|
|
|
* haystack string, or false if needle is not found |
9560
|
|
|
*/ |
9561
|
|
|
public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0) |
9562
|
|
|
{ |
9563
|
|
|
if ($haystack === '' || $needle === '') { |
9564
|
|
|
return false; |
9565
|
|
|
} |
9566
|
|
|
|
9567
|
|
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
9568
|
|
|
// "mb_" is available if overload is used, so use it ... |
9569
|
|
|
return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT |
9570
|
|
|
} |
9571
|
|
|
|
9572
|
|
|
return \strripos($haystack, $needle, $offset); |
9573
|
|
|
} |
9574
|
|
|
|
9575
|
|
|
/** |
9576
|
|
|
* Find position of last occurrence of a string in a string. |
9577
|
|
|
* |
9578
|
|
|
* @see http://php.net/manual/en/function.mb-strrpos.php |
9579
|
|
|
* |
9580
|
|
|
* @param string $haystack <p>The string being checked, for the last occurrence of needle</p> |
9581
|
|
|
* @param int|string $needle <p>The string to find in haystack.<br>Or a code point as int.</p> |
9582
|
|
|
* @param int $offset [optional] <p>May be specified to begin searching an arbitrary number of characters |
9583
|
|
|
* into the string. Negative values will stop searching at an arbitrary point prior to |
9584
|
|
|
* the end of the string. |
9585
|
|
|
* </p> |
9586
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
9587
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9588
|
|
|
* |
9589
|
|
|
* @return false|int |
9590
|
|
|
* The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack |
9591
|
|
|
* string.<br>If needle is not found, it returns false. |
9592
|
|
|
*/ |
9593
|
35 |
|
public static function strrpos( |
9594
|
|
|
string $haystack, |
9595
|
|
|
$needle, |
9596
|
|
|
int $offset = 0, |
9597
|
|
|
string $encoding = 'UTF-8', |
9598
|
|
|
bool $cleanUtf8 = false |
9599
|
|
|
) { |
9600
|
35 |
|
if ($haystack === '') { |
9601
|
3 |
|
return false; |
9602
|
|
|
} |
9603
|
|
|
|
9604
|
|
|
// iconv and mbstring do not support integer $needle |
9605
|
34 |
|
if ((int) $needle === $needle && $needle >= 0) { |
9606
|
2 |
|
$needle = (string) self::chr($needle); |
9607
|
|
|
} |
9608
|
34 |
|
$needle = (string) $needle; |
9609
|
|
|
|
9610
|
34 |
|
if ($needle === '') { |
9611
|
2 |
|
return false; |
9612
|
|
|
} |
9613
|
|
|
|
9614
|
34 |
|
if ($cleanUtf8 === true) { |
9615
|
|
|
// \mb_strrpos && iconv_strrpos is not tolerant to invalid characters |
9616
|
4 |
|
$needle = self::clean($needle); |
9617
|
4 |
|
$haystack = self::clean($haystack); |
9618
|
|
|
} |
9619
|
|
|
|
9620
|
34 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9621
|
8 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9622
|
|
|
} |
9623
|
|
|
|
9624
|
|
|
// |
9625
|
|
|
// fallback via mbstring |
9626
|
|
|
// |
9627
|
|
|
|
9628
|
34 |
|
if (self::$SUPPORT['mbstring'] === true) { |
9629
|
34 |
|
if ($encoding === 'UTF-8') { |
9630
|
34 |
|
return \mb_strrpos($haystack, $needle, $offset); |
9631
|
|
|
} |
9632
|
|
|
|
9633
|
2 |
|
return \mb_strrpos($haystack, $needle, $offset, $encoding); |
9634
|
|
|
} |
9635
|
|
|
|
9636
|
|
|
// |
9637
|
|
|
// fallback for binary || ascii only |
9638
|
|
|
// |
9639
|
|
|
|
9640
|
|
|
if ( |
9641
|
|
|
$encoding === 'CP850' |
9642
|
|
|
|| |
9643
|
|
|
$encoding === 'ASCII' |
9644
|
|
|
) { |
9645
|
|
|
return \strrpos($haystack, $needle, $offset); |
9646
|
|
|
} |
9647
|
|
|
|
9648
|
|
|
if ( |
9649
|
|
|
$encoding !== 'UTF-8' |
9650
|
|
|
&& |
9651
|
|
|
self::$SUPPORT['mbstring'] === false |
9652
|
|
|
) { |
9653
|
|
|
\trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
9654
|
|
|
} |
9655
|
|
|
|
9656
|
|
|
// |
9657
|
|
|
// fallback via intl |
9658
|
|
|
// |
9659
|
|
|
|
9660
|
|
|
if ( |
9661
|
|
|
$offset >= 0 // grapheme_strrpos() can't handle negative offset |
9662
|
|
|
&& |
9663
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings |
9664
|
|
|
&& |
9665
|
|
|
self::$SUPPORT['intl'] === true |
9666
|
|
|
) { |
9667
|
|
|
$returnTmp = \grapheme_strrpos($haystack, $needle, $offset); |
9668
|
|
|
if ($returnTmp !== false) { |
9669
|
|
|
return $returnTmp; |
9670
|
|
|
} |
9671
|
|
|
} |
9672
|
|
|
|
9673
|
|
|
// |
9674
|
|
|
// fallback for ascii only |
9675
|
|
|
// |
9676
|
|
|
|
9677
|
|
|
if (self::is_ascii($haystack . $needle)) { |
9678
|
|
|
return \strrpos($haystack, $needle, $offset); |
9679
|
|
|
} |
9680
|
|
|
|
9681
|
|
|
// |
9682
|
|
|
// fallback via vanilla php |
9683
|
|
|
// |
9684
|
|
|
|
9685
|
|
|
$haystackTmp = null; |
9686
|
|
|
if ($offset > 0) { |
9687
|
|
|
$haystackTmp = self::substr($haystack, $offset); |
9688
|
|
|
} elseif ($offset < 0) { |
9689
|
|
|
$haystackTmp = self::substr($haystack, 0, $offset); |
9690
|
|
|
$offset = 0; |
9691
|
|
|
} |
9692
|
|
|
|
9693
|
|
|
if ($haystackTmp !== null) { |
9694
|
|
|
if ($haystackTmp === false) { |
9695
|
|
|
$haystackTmp = ''; |
9696
|
|
|
} |
9697
|
|
|
$haystack = (string) $haystackTmp; |
9698
|
|
|
} |
9699
|
|
|
|
9700
|
|
|
$pos = \strrpos($haystack, $needle); |
9701
|
|
|
if ($pos === false) { |
9702
|
|
|
return false; |
9703
|
|
|
} |
9704
|
|
|
|
9705
|
|
|
$strTmp = \substr($haystack, 0, $pos); |
9706
|
|
|
if ($strTmp === false) { |
9707
|
|
|
return false; |
9708
|
|
|
} |
9709
|
|
|
|
9710
|
|
|
return $offset + (int) self::strlen($strTmp); |
9711
|
|
|
} |
9712
|
|
|
|
9713
|
|
|
/** |
9714
|
|
|
* Find position of last occurrence of a string in a string. |
9715
|
|
|
* |
9716
|
|
|
* @param string $haystack <p> |
9717
|
|
|
* The string being checked, for the last occurrence |
9718
|
|
|
* of needle. |
9719
|
|
|
* </p> |
9720
|
|
|
* @param string $needle <p> |
9721
|
|
|
* The string to find in haystack. |
9722
|
|
|
* </p> |
9723
|
|
|
* @param int $offset [optional] May be specified to begin searching an arbitrary number of characters into |
9724
|
|
|
* the string. Negative values will stop searching at an arbitrary point |
9725
|
|
|
* prior to the end of the string. |
9726
|
|
|
* |
9727
|
|
|
* @return false|int The numeric position of the last occurrence of needle in the |
9728
|
|
|
* haystack string. If needle is not found, it returns false. |
9729
|
|
|
*/ |
9730
|
|
|
public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0) |
9731
|
|
|
{ |
9732
|
|
|
if ($haystack === '' || $needle === '') { |
9733
|
|
|
return false; |
9734
|
|
|
} |
9735
|
|
|
|
9736
|
|
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
9737
|
|
|
// "mb_" is available if overload is used, so use it ... |
9738
|
|
|
return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT |
9739
|
|
|
} |
9740
|
|
|
|
9741
|
|
|
return \strrpos($haystack, $needle, $offset); |
9742
|
|
|
} |
9743
|
|
|
|
9744
|
|
|
/** |
9745
|
|
|
* Finds the length of the initial segment of a string consisting entirely of characters contained within a given |
9746
|
|
|
* mask. |
9747
|
|
|
* |
9748
|
|
|
* @param string $str <p>The input string.</p> |
9749
|
|
|
* @param string $mask <p>The mask of chars</p> |
9750
|
|
|
* @param int $offset [optional] |
9751
|
|
|
* @param int $length [optional] |
9752
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
9753
|
|
|
* |
9754
|
|
|
* @return false|int |
9755
|
|
|
*/ |
9756
|
10 |
|
public static function strspn( |
9757
|
|
|
string $str, |
9758
|
|
|
string $mask, |
9759
|
|
|
int $offset = 0, |
9760
|
|
|
int $length = null, |
9761
|
|
|
string $encoding = 'UTF-8' |
9762
|
|
|
) { |
9763
|
10 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9764
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9765
|
|
|
} |
9766
|
|
|
|
9767
|
10 |
|
if ($offset || $length !== null) { |
9768
|
2 |
|
if ($encoding === 'UTF-8') { |
9769
|
2 |
|
if ($length === null) { |
9770
|
|
|
$str = (string) \mb_substr($str, $offset); |
9771
|
|
|
} else { |
9772
|
2 |
|
$str = (string) \mb_substr($str, $offset, $length); |
9773
|
|
|
} |
9774
|
|
|
} else { |
9775
|
|
|
$str = (string) self::substr($str, $offset, $length, $encoding); |
9776
|
|
|
} |
9777
|
|
|
} |
9778
|
|
|
|
9779
|
10 |
|
if ($str === '' || $mask === '') { |
9780
|
2 |
|
return 0; |
9781
|
|
|
} |
9782
|
|
|
|
9783
|
8 |
|
$matches = []; |
9784
|
|
|
|
9785
|
8 |
|
return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0; |
9786
|
|
|
} |
9787
|
|
|
|
9788
|
|
|
/** |
9789
|
|
|
* Returns part of haystack string from the first occurrence of needle to the end of haystack. |
9790
|
|
|
* |
9791
|
|
|
* @param string $haystack <p>The input string. Must be valid UTF-8.</p> |
9792
|
|
|
* @param string $needle <p>The string to look for. Must be valid UTF-8.</p> |
9793
|
|
|
* @param bool $before_needle [optional] <p> |
9794
|
|
|
* If <b>TRUE</b>, strstr() returns the part of the |
9795
|
|
|
* haystack before the first occurrence of the needle (excluding the needle). |
9796
|
|
|
* </p> |
9797
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9798
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9799
|
|
|
* |
9800
|
|
|
* @return false|string |
9801
|
|
|
* A sub-string,<br>or <strong>false</strong> if needle is not found |
9802
|
|
|
*/ |
9803
|
3 |
|
public static function strstr( |
9804
|
|
|
string $haystack, |
9805
|
|
|
string $needle, |
9806
|
|
|
bool $before_needle = false, |
9807
|
|
|
string $encoding = 'UTF-8', |
9808
|
|
|
$cleanUtf8 = false |
9809
|
|
|
) { |
9810
|
3 |
|
if ($haystack === '' || $needle === '') { |
9811
|
2 |
|
return false; |
9812
|
|
|
} |
9813
|
|
|
|
9814
|
3 |
|
if ($cleanUtf8 === true) { |
9815
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
9816
|
|
|
// if invalid characters are found in $haystack before $needle |
9817
|
|
|
$needle = self::clean($needle); |
9818
|
|
|
$haystack = self::clean($haystack); |
9819
|
|
|
} |
9820
|
|
|
|
9821
|
3 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9822
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9823
|
|
|
} |
9824
|
|
|
|
9825
|
|
|
// |
9826
|
|
|
// fallback via mbstring |
9827
|
|
|
// |
9828
|
|
|
|
9829
|
3 |
|
if (self::$SUPPORT['mbstring'] === true) { |
9830
|
3 |
|
if ($encoding === 'UTF-8') { |
9831
|
3 |
|
return \mb_strstr($haystack, $needle, $before_needle); |
9832
|
|
|
} |
9833
|
|
|
|
9834
|
2 |
|
return \mb_strstr($haystack, $needle, $before_needle, $encoding); |
9835
|
|
|
} |
9836
|
|
|
|
9837
|
|
|
// |
9838
|
|
|
// fallback for binary || ascii only |
9839
|
|
|
// |
9840
|
|
|
|
9841
|
|
|
if ( |
9842
|
|
|
$encoding === 'CP850' |
9843
|
|
|
|| |
9844
|
|
|
$encoding === 'ASCII' |
9845
|
|
|
) { |
9846
|
|
|
return \strstr($haystack, $needle, $before_needle); |
9847
|
|
|
} |
9848
|
|
|
|
9849
|
|
|
if ( |
9850
|
|
|
$encoding !== 'UTF-8' |
9851
|
|
|
&& |
9852
|
|
|
self::$SUPPORT['mbstring'] === false |
9853
|
|
|
) { |
9854
|
|
|
\trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
9855
|
|
|
} |
9856
|
|
|
|
9857
|
|
|
// |
9858
|
|
|
// fallback via intl |
9859
|
|
|
// |
9860
|
|
|
|
9861
|
|
|
if ( |
9862
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings |
9863
|
|
|
&& |
9864
|
|
|
self::$SUPPORT['intl'] === true |
9865
|
|
|
) { |
9866
|
|
|
$returnTmp = \grapheme_strstr($haystack, $needle, $before_needle); |
9867
|
|
|
if ($returnTmp !== false) { |
9868
|
|
|
return $returnTmp; |
9869
|
|
|
} |
9870
|
|
|
} |
9871
|
|
|
|
9872
|
|
|
// |
9873
|
|
|
// fallback for ascii only |
9874
|
|
|
// |
9875
|
|
|
|
9876
|
|
|
if (self::is_ascii($haystack . $needle)) { |
9877
|
|
|
return \strstr($haystack, $needle, $before_needle); |
9878
|
|
|
} |
9879
|
|
|
|
9880
|
|
|
// |
9881
|
|
|
// fallback via vanilla php |
9882
|
|
|
// |
9883
|
|
|
|
9884
|
|
|
\preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match); |
9885
|
|
|
|
9886
|
|
|
if (!isset($match[1])) { |
9887
|
|
|
return false; |
9888
|
|
|
} |
9889
|
|
|
|
9890
|
|
|
if ($before_needle) { |
9891
|
|
|
return $match[1]; |
9892
|
|
|
} |
9893
|
|
|
|
9894
|
|
|
return self::substr($haystack, (int) self::strlen($match[1])); |
9895
|
|
|
} |
9896
|
|
|
|
9897
|
|
|
/** |
9898
|
|
|
* * Finds first occurrence of a string within another. |
9899
|
|
|
* |
9900
|
|
|
* @param string $haystack <p> |
9901
|
|
|
* The string from which to get the first occurrence |
9902
|
|
|
* of needle. |
9903
|
|
|
* </p> |
9904
|
|
|
* @param string $needle <p> |
9905
|
|
|
* The string to find in haystack. |
9906
|
|
|
* </p> |
9907
|
|
|
* @param bool $before_needle [optional] <p> |
9908
|
|
|
* Determines which portion of haystack |
9909
|
|
|
* this function returns. |
9910
|
|
|
* If set to true, it returns all of haystack |
9911
|
|
|
* from the beginning to the first occurrence of needle. |
9912
|
|
|
* If set to false, it returns all of haystack |
9913
|
|
|
* from the first occurrence of needle to the end, |
9914
|
|
|
* </p> |
9915
|
|
|
* |
9916
|
|
|
* @return false|string the portion of haystack, |
9917
|
|
|
* or false if needle is not found |
9918
|
|
|
*/ |
9919
|
|
|
public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false) |
9920
|
|
|
{ |
9921
|
|
|
if ($haystack === '' || $needle === '') { |
9922
|
|
|
return false; |
9923
|
|
|
} |
9924
|
|
|
|
9925
|
|
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
9926
|
|
|
// "mb_" is available if overload is used, so use it ... |
9927
|
|
|
return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT |
9928
|
|
|
} |
9929
|
|
|
|
9930
|
|
|
return \strstr($haystack, $needle, $before_needle); |
9931
|
|
|
} |
9932
|
|
|
|
9933
|
|
|
/** |
9934
|
|
|
* Unicode transformation for case-less matching. |
9935
|
|
|
* |
9936
|
|
|
* @see http://unicode.org/reports/tr21/tr21-5.html |
9937
|
|
|
* |
9938
|
|
|
* @param string $str <p>The input string.</p> |
9939
|
|
|
* @param bool $full [optional] <p> |
9940
|
|
|
* <b>true</b>, replace full case folding chars (default)<br> |
9941
|
|
|
* <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD] |
9942
|
|
|
* </p> |
9943
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9944
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
9945
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
9946
|
|
|
* @param bool $lower [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase |
9947
|
|
|
* is for some languages better ...</p> |
9948
|
|
|
* |
9949
|
|
|
* @return string |
9950
|
|
|
*/ |
9951
|
32 |
|
public static function strtocasefold( |
9952
|
|
|
string $str, |
9953
|
|
|
bool $full = true, |
9954
|
|
|
bool $cleanUtf8 = false, |
9955
|
|
|
string $encoding = 'UTF-8', |
9956
|
|
|
string $lang = null, |
9957
|
|
|
$lower = true |
9958
|
|
|
): string { |
9959
|
32 |
|
if ($str === '') { |
9960
|
5 |
|
return ''; |
9961
|
|
|
} |
9962
|
|
|
|
9963
|
31 |
|
if ($cleanUtf8 === true) { |
9964
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
9965
|
|
|
// if invalid characters are found in $haystack before $needle |
9966
|
2 |
|
$str = self::clean($str); |
9967
|
|
|
} |
9968
|
|
|
|
9969
|
31 |
|
$str = self::fixStrCaseHelper($str, $lower, $full); |
9970
|
|
|
|
9971
|
31 |
|
if ($lang === null && $encoding === 'UTF-8') { |
9972
|
31 |
|
if ($lower === true) { |
9973
|
2 |
|
return \mb_strtolower($str); |
9974
|
|
|
} |
9975
|
|
|
|
9976
|
29 |
|
return \mb_strtoupper($str); |
9977
|
|
|
} |
9978
|
|
|
|
9979
|
2 |
|
if ($lower === true) { |
9980
|
|
|
return self::strtolower($str, $encoding, false, $lang); |
9981
|
|
|
} |
9982
|
|
|
|
9983
|
2 |
|
return self::strtoupper($str, $encoding, false, $lang); |
9984
|
|
|
} |
9985
|
|
|
|
9986
|
|
|
/** |
9987
|
|
|
* Make a string lowercase. |
9988
|
|
|
* |
9989
|
|
|
* @see http://php.net/manual/en/function.mb-strtolower.php |
9990
|
|
|
* |
9991
|
|
|
* @param string $str <p>The string being lowercased.</p> |
9992
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9993
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9994
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
9995
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
9996
|
|
|
* |
9997
|
|
|
* @return string |
9998
|
|
|
* <p>String with all alphabetic characters converted to lowercase.</p> |
9999
|
|
|
*/ |
10000
|
73 |
|
public static function strtolower( |
10001
|
|
|
$str, |
10002
|
|
|
string $encoding = 'UTF-8', |
10003
|
|
|
bool $cleanUtf8 = false, |
10004
|
|
|
string $lang = null, |
10005
|
|
|
bool $tryToKeepStringLength = false |
10006
|
|
|
): string { |
10007
|
|
|
// init |
10008
|
73 |
|
$str = (string) $str; |
10009
|
|
|
|
10010
|
73 |
|
if ($str === '') { |
10011
|
1 |
|
return ''; |
10012
|
|
|
} |
10013
|
|
|
|
10014
|
72 |
|
if ($cleanUtf8 === true) { |
10015
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
10016
|
|
|
// if invalid characters are found in $haystack before $needle |
10017
|
2 |
|
$str = self::clean($str); |
10018
|
|
|
} |
10019
|
|
|
|
10020
|
|
|
// hack for old php version or for the polyfill ... |
10021
|
72 |
|
if ($tryToKeepStringLength === true) { |
10022
|
|
|
$str = self::fixStrCaseHelper($str, true); |
10023
|
|
|
} |
10024
|
|
|
|
10025
|
72 |
|
if ($lang === null && $encoding === 'UTF-8') { |
10026
|
13 |
|
return \mb_strtolower($str); |
10027
|
|
|
} |
10028
|
|
|
|
10029
|
61 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10030
|
|
|
|
10031
|
61 |
|
if ($lang !== null) { |
10032
|
2 |
|
if (self::$SUPPORT['intl'] === true) { |
10033
|
2 |
|
if (self::$INTL_TRANSLITERATOR_LIST === null) { |
10034
|
|
|
self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); |
10035
|
|
|
} |
10036
|
|
|
|
10037
|
2 |
|
$langCode = $lang . '-Lower'; |
10038
|
2 |
|
if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) { |
10039
|
|
|
\trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING); |
10040
|
|
|
|
10041
|
|
|
$langCode = 'Any-Lower'; |
10042
|
|
|
} |
10043
|
|
|
|
10044
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
10045
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
10046
|
2 |
|
return (string) \transliterator_transliterate($langCode, $str); |
10047
|
|
|
} |
10048
|
|
|
|
10049
|
|
|
\trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING); |
10050
|
|
|
} |
10051
|
|
|
|
10052
|
|
|
// always fallback via symfony polyfill |
10053
|
61 |
|
return \mb_strtolower($str, $encoding); |
10054
|
|
|
} |
10055
|
|
|
|
10056
|
|
|
/** |
10057
|
|
|
* Make a string uppercase. |
10058
|
|
|
* |
10059
|
|
|
* @see http://php.net/manual/en/function.mb-strtoupper.php |
10060
|
|
|
* |
10061
|
|
|
* @param string $str <p>The string being uppercased.</p> |
10062
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
10063
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10064
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
10065
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
10066
|
|
|
* |
10067
|
|
|
* @return string |
10068
|
|
|
* <p>String with all alphabetic characters converted to uppercase.</p> |
10069
|
|
|
*/ |
10070
|
17 |
|
public static function strtoupper( |
10071
|
|
|
$str, |
10072
|
|
|
string $encoding = 'UTF-8', |
10073
|
|
|
bool $cleanUtf8 = false, |
10074
|
|
|
string $lang = null, |
10075
|
|
|
bool $tryToKeepStringLength = false |
10076
|
|
|
): string { |
10077
|
|
|
// init |
10078
|
17 |
|
$str = (string) $str; |
10079
|
|
|
|
10080
|
17 |
|
if ($str === '') { |
10081
|
1 |
|
return ''; |
10082
|
|
|
} |
10083
|
|
|
|
10084
|
16 |
|
if ($cleanUtf8 === true) { |
10085
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
10086
|
|
|
// if invalid characters are found in $haystack before $needle |
10087
|
2 |
|
$str = self::clean($str); |
10088
|
|
|
} |
10089
|
|
|
|
10090
|
|
|
// hack for old php version or for the polyfill ... |
10091
|
16 |
|
if ($tryToKeepStringLength === true) { |
10092
|
2 |
|
$str = self::fixStrCaseHelper($str, false); |
10093
|
|
|
} |
10094
|
|
|
|
10095
|
16 |
|
if ($lang === null && $encoding === 'UTF-8') { |
10096
|
8 |
|
return \mb_strtoupper($str); |
10097
|
|
|
} |
10098
|
|
|
|
10099
|
10 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10100
|
|
|
|
10101
|
10 |
|
if ($lang !== null) { |
10102
|
2 |
|
if (self::$SUPPORT['intl'] === true) { |
10103
|
2 |
|
if (self::$INTL_TRANSLITERATOR_LIST === null) { |
10104
|
|
|
self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); |
10105
|
|
|
} |
10106
|
|
|
|
10107
|
2 |
|
$langCode = $lang . '-Upper'; |
10108
|
2 |
|
if (!\in_array($langCode, self::$INTL_TRANSLITERATOR_LIST, true)) { |
10109
|
|
|
\trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING); |
10110
|
|
|
|
10111
|
|
|
$langCode = 'Any-Upper'; |
10112
|
|
|
} |
10113
|
|
|
|
10114
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
10115
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
10116
|
2 |
|
return (string) \transliterator_transliterate($langCode, $str); |
10117
|
|
|
} |
10118
|
|
|
|
10119
|
|
|
\trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING); |
10120
|
|
|
} |
10121
|
|
|
|
10122
|
|
|
// always fallback via symfony polyfill |
10123
|
10 |
|
return \mb_strtoupper($str, $encoding); |
10124
|
|
|
} |
10125
|
|
|
|
10126
|
|
|
/** |
10127
|
|
|
* Translate characters or replace sub-strings. |
10128
|
|
|
* |
10129
|
|
|
* @see http://php.net/manual/en/function.strtr.php |
10130
|
|
|
* |
10131
|
|
|
* @param string $str <p>The string being translated.</p> |
10132
|
|
|
* @param string|string[] $from <p>The string replacing from.</p> |
10133
|
|
|
* @param string|string[] $to [optional] <p>The string being translated to to.</p> |
10134
|
|
|
* |
10135
|
|
|
* @return string |
10136
|
|
|
* This function returns a copy of str, translating all occurrences of each character in from to the |
10137
|
|
|
* corresponding character in to |
10138
|
|
|
*/ |
10139
|
2 |
|
public static function strtr(string $str, $from, $to = ''): string |
10140
|
|
|
{ |
10141
|
2 |
|
if ($str === '') { |
10142
|
|
|
return ''; |
10143
|
|
|
} |
10144
|
|
|
|
10145
|
2 |
|
if ($from === $to) { |
10146
|
|
|
return $str; |
10147
|
|
|
} |
10148
|
|
|
|
10149
|
2 |
|
if ($to !== '') { |
10150
|
2 |
|
$from = self::str_split($from); |
10151
|
2 |
|
$to = self::str_split($to); |
10152
|
2 |
|
$countFrom = \count($from); |
10153
|
2 |
|
$countTo = \count($to); |
10154
|
|
|
|
10155
|
2 |
|
if ($countFrom > $countTo) { |
10156
|
2 |
|
$from = \array_slice($from, 0, $countTo); |
10157
|
2 |
|
} elseif ($countFrom < $countTo) { |
10158
|
2 |
|
$to = \array_slice($to, 0, $countFrom); |
10159
|
|
|
} |
10160
|
|
|
|
10161
|
2 |
|
$from = \array_combine($from, $to); |
10162
|
2 |
|
if ($from === false) { |
10163
|
|
|
throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')'); |
10164
|
|
|
} |
10165
|
|
|
} |
10166
|
|
|
|
10167
|
2 |
|
if (\is_string($from)) { |
10168
|
2 |
|
return \str_replace($from, '', $str); |
10169
|
|
|
} |
10170
|
|
|
|
10171
|
2 |
|
return \strtr($str, $from); |
10172
|
|
|
} |
10173
|
|
|
|
10174
|
|
|
/** |
10175
|
|
|
* Return the width of a string. |
10176
|
|
|
* |
10177
|
|
|
* @param string $str <p>The input string.</p> |
10178
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10179
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10180
|
|
|
* |
10181
|
|
|
* @return int |
10182
|
|
|
*/ |
10183
|
2 |
|
public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int |
10184
|
|
|
{ |
10185
|
2 |
|
if ($str === '') { |
10186
|
2 |
|
return 0; |
10187
|
|
|
} |
10188
|
|
|
|
10189
|
2 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
10190
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10191
|
|
|
} |
10192
|
|
|
|
10193
|
2 |
|
if ($cleanUtf8 === true) { |
10194
|
|
|
// iconv and mbstring are not tolerant to invalid encoding |
10195
|
|
|
// further, their behaviour is inconsistent with that of PHP's substr |
10196
|
2 |
|
$str = self::clean($str); |
10197
|
|
|
} |
10198
|
|
|
|
10199
|
|
|
// |
10200
|
|
|
// fallback via mbstring |
10201
|
|
|
// |
10202
|
|
|
|
10203
|
2 |
|
if (self::$SUPPORT['mbstring'] === true) { |
10204
|
2 |
|
if ($encoding === 'UTF-8') { |
10205
|
2 |
|
return \mb_strwidth($str); |
10206
|
|
|
} |
10207
|
|
|
|
10208
|
|
|
return \mb_strwidth($str, $encoding); |
10209
|
|
|
} |
10210
|
|
|
|
10211
|
|
|
// |
10212
|
|
|
// fallback via vanilla php |
10213
|
|
|
// |
10214
|
|
|
|
10215
|
|
|
if ($encoding !== 'UTF-8') { |
10216
|
|
|
$str = self::encode('UTF-8', $str, false, $encoding); |
10217
|
|
|
} |
10218
|
|
|
|
10219
|
|
|
$wide = 0; |
10220
|
|
|
$str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide); |
10221
|
|
|
|
10222
|
|
|
return ($wide << 1) + (int) self::strlen($str, 'UTF-8'); |
10223
|
|
|
} |
10224
|
|
|
|
10225
|
|
|
/** |
10226
|
|
|
* Get part of a string. |
10227
|
|
|
* |
10228
|
|
|
* @see http://php.net/manual/en/function.mb-substr.php |
10229
|
|
|
* |
10230
|
|
|
* @param string $str <p>The string being checked.</p> |
10231
|
|
|
* @param int $offset <p>The first position used in str.</p> |
10232
|
|
|
* @param int $length [optional] <p>The maximum length of the returned string.</p> |
10233
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10234
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10235
|
|
|
* |
10236
|
|
|
* @return false|string |
10237
|
|
|
* The portion of <i>str</i> specified by the <i>offset</i> and |
10238
|
|
|
* <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i> |
10239
|
|
|
* characters long, <b>FALSE</b> will be returned. |
10240
|
|
|
*/ |
10241
|
172 |
|
public static function substr( |
10242
|
|
|
string $str, |
10243
|
|
|
int $offset = 0, |
10244
|
|
|
int $length = null, |
10245
|
|
|
string $encoding = 'UTF-8', |
10246
|
|
|
bool $cleanUtf8 = false |
10247
|
|
|
) { |
10248
|
|
|
// empty string |
10249
|
172 |
|
if ($str === '' || $length === 0) { |
10250
|
8 |
|
return ''; |
10251
|
|
|
} |
10252
|
|
|
|
10253
|
168 |
|
if ($cleanUtf8 === true) { |
10254
|
|
|
// iconv and mbstring are not tolerant to invalid encoding |
10255
|
|
|
// further, their behaviour is inconsistent with that of PHP's substr |
10256
|
2 |
|
$str = self::clean($str); |
10257
|
|
|
} |
10258
|
|
|
|
10259
|
|
|
// whole string |
10260
|
168 |
|
if (!$offset && $length === null) { |
10261
|
7 |
|
return $str; |
10262
|
|
|
} |
10263
|
|
|
|
10264
|
163 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
10265
|
19 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10266
|
|
|
} |
10267
|
|
|
|
10268
|
|
|
// |
10269
|
|
|
// fallback via mbstring |
10270
|
|
|
// |
10271
|
|
|
|
10272
|
163 |
|
if (self::$SUPPORT['mbstring'] === true) { |
10273
|
161 |
|
if ($encoding === 'UTF-8') { |
10274
|
161 |
|
if ($length === null) { |
10275
|
64 |
|
return \mb_substr($str, $offset); |
10276
|
|
|
} |
10277
|
|
|
|
10278
|
102 |
|
return \mb_substr($str, $offset, $length); |
10279
|
|
|
} |
10280
|
|
|
|
10281
|
|
|
return self::substr($str, $offset, $length, $encoding); |
10282
|
|
|
} |
10283
|
|
|
|
10284
|
|
|
// |
10285
|
|
|
// fallback for binary || ascii only |
10286
|
|
|
// |
10287
|
|
|
|
10288
|
|
|
if ( |
10289
|
4 |
|
$encoding === 'CP850' |
10290
|
|
|
|| |
10291
|
4 |
|
$encoding === 'ASCII' |
10292
|
|
|
) { |
10293
|
|
|
if ($length === null) { |
10294
|
|
|
return \substr($str, $offset); |
10295
|
|
|
} |
10296
|
|
|
|
10297
|
|
|
return \substr($str, $offset, $length); |
10298
|
|
|
} |
10299
|
|
|
|
10300
|
|
|
// otherwise we need the string-length |
10301
|
4 |
|
$str_length = 0; |
10302
|
4 |
|
if ($offset || $length === null) { |
10303
|
4 |
|
$str_length = self::strlen($str, $encoding); |
10304
|
|
|
} |
10305
|
|
|
|
10306
|
|
|
// e.g.: invalid chars + mbstring not installed |
10307
|
4 |
|
if ($str_length === false) { |
10308
|
|
|
return false; |
10309
|
|
|
} |
10310
|
|
|
|
10311
|
|
|
// empty string |
10312
|
4 |
|
if ($offset === $str_length && !$length) { |
|
|
|
|
10313
|
|
|
return ''; |
10314
|
|
|
} |
10315
|
|
|
|
10316
|
|
|
// impossible |
10317
|
4 |
|
if ($offset && $offset > $str_length) { |
10318
|
|
|
return ''; |
10319
|
|
|
} |
10320
|
|
|
|
10321
|
4 |
|
if ($length === null) { |
10322
|
4 |
|
$length = (int) $str_length; |
10323
|
|
|
} else { |
10324
|
2 |
|
$length = (int) $length; |
10325
|
|
|
} |
10326
|
|
|
|
10327
|
|
|
if ( |
10328
|
4 |
|
$encoding !== 'UTF-8' |
10329
|
|
|
&& |
10330
|
4 |
|
self::$SUPPORT['mbstring'] === false |
10331
|
|
|
) { |
10332
|
2 |
|
\trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
10333
|
|
|
} |
10334
|
|
|
|
10335
|
|
|
// |
10336
|
|
|
// fallback via intl |
10337
|
|
|
// |
10338
|
|
|
|
10339
|
|
|
if ( |
10340
|
4 |
|
$encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings |
10341
|
|
|
&& |
10342
|
4 |
|
$offset >= 0 // grapheme_substr() can't handle negative offset |
10343
|
|
|
&& |
10344
|
4 |
|
self::$SUPPORT['intl'] === true |
10345
|
|
|
) { |
10346
|
|
|
$returnTmp = \grapheme_substr($str, $offset, $length); |
10347
|
|
|
if ($returnTmp !== false) { |
10348
|
|
|
return $returnTmp; |
10349
|
|
|
} |
10350
|
|
|
} |
10351
|
|
|
|
10352
|
|
|
// |
10353
|
|
|
// fallback via iconv |
10354
|
|
|
// |
10355
|
|
|
|
10356
|
|
|
if ( |
10357
|
4 |
|
$length >= 0 // "iconv_substr()" can't handle negative length |
10358
|
|
|
&& |
10359
|
4 |
|
self::$SUPPORT['iconv'] === true |
10360
|
|
|
) { |
10361
|
|
|
$returnTmp = \iconv_substr($str, $offset, $length); |
10362
|
|
|
if ($returnTmp !== false) { |
10363
|
|
|
return $returnTmp; |
10364
|
|
|
} |
10365
|
|
|
} |
10366
|
|
|
|
10367
|
|
|
// |
10368
|
|
|
// fallback for ascii only |
10369
|
|
|
// |
10370
|
|
|
|
10371
|
4 |
|
if (self::is_ascii($str)) { |
10372
|
|
|
return \substr($str, $offset, $length); |
10373
|
|
|
} |
10374
|
|
|
|
10375
|
|
|
// |
10376
|
|
|
// fallback via vanilla php |
10377
|
|
|
// |
10378
|
|
|
|
10379
|
|
|
// split to array, and remove invalid characters |
10380
|
4 |
|
$array = self::str_split($str); |
10381
|
|
|
|
10382
|
|
|
// extract relevant part, and join to make sting again |
10383
|
4 |
|
return \implode('', \array_slice($array, $offset, $length)); |
10384
|
|
|
} |
10385
|
|
|
|
10386
|
|
|
/** |
10387
|
|
|
* Binary safe comparison of two strings from an offset, up to length characters. |
10388
|
|
|
* |
10389
|
|
|
* @param string $str1 <p>The main string being compared.</p> |
10390
|
|
|
* @param string $str2 <p>The secondary string being compared.</p> |
10391
|
|
|
* @param int $offset [optional] <p>The start position for the comparison. If negative, it starts |
10392
|
|
|
* counting from the end of the string.</p> |
10393
|
|
|
* @param int|null $length [optional] <p>The length of the comparison. The default value is the largest |
10394
|
|
|
* of the length of the str compared to the length of main_str less the |
10395
|
|
|
* offset.</p> |
10396
|
|
|
* @param bool $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case |
10397
|
|
|
* insensitive.</p> |
10398
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10399
|
|
|
* |
10400
|
|
|
* @return int |
10401
|
|
|
* <strong>< 0</strong> if str1 is less than str2;<br> |
10402
|
|
|
* <strong>> 0</strong> if str1 is greater than str2,<br> |
10403
|
|
|
* <strong>0</strong> if they are equal |
10404
|
|
|
*/ |
10405
|
2 |
|
public static function substr_compare( |
10406
|
|
|
string $str1, |
10407
|
|
|
string $str2, |
10408
|
|
|
int $offset = 0, |
10409
|
|
|
int $length = null, |
10410
|
|
|
bool $case_insensitivity = false, |
10411
|
|
|
string $encoding = 'UTF-8' |
10412
|
|
|
): int { |
10413
|
|
|
if ( |
10414
|
2 |
|
$offset !== 0 |
10415
|
|
|
|| |
10416
|
2 |
|
$length !== null |
10417
|
|
|
) { |
10418
|
2 |
|
if ($encoding === 'UTF-8') { |
10419
|
2 |
|
if ($length === null) { |
10420
|
2 |
|
$str1 = (string) \mb_substr($str1, $offset); |
10421
|
|
|
} else { |
10422
|
2 |
|
$str1 = (string) \mb_substr($str1, $offset, $length); |
10423
|
|
|
} |
10424
|
2 |
|
$str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1)); |
10425
|
|
|
} else { |
10426
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10427
|
|
|
|
10428
|
|
|
$str1 = (string) self::substr($str1, $offset, $length, $encoding); |
10429
|
|
|
$str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding); |
10430
|
|
|
} |
10431
|
|
|
} |
10432
|
|
|
|
10433
|
2 |
|
if ($case_insensitivity === true) { |
10434
|
2 |
|
return self::strcasecmp($str1, $str2, $encoding); |
10435
|
|
|
} |
10436
|
|
|
|
10437
|
2 |
|
return self::strcmp($str1, $str2); |
10438
|
|
|
} |
10439
|
|
|
|
10440
|
|
|
/** |
10441
|
|
|
* Count the number of substring occurrences. |
10442
|
|
|
* |
10443
|
|
|
* @see http://php.net/manual/en/function.substr-count.php |
10444
|
|
|
* |
10445
|
|
|
* @param string $haystack <p>The string to search in.</p> |
10446
|
|
|
* @param string $needle <p>The substring to search for.</p> |
10447
|
|
|
* @param int $offset [optional] <p>The offset where to start counting.</p> |
10448
|
|
|
* @param int $length [optional] <p> |
10449
|
|
|
* The maximum length after the specified offset to search for the |
10450
|
|
|
* substring. It outputs a warning if the offset plus the length is |
10451
|
|
|
* greater than the haystack length. |
10452
|
|
|
* </p> |
10453
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10454
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10455
|
|
|
* |
10456
|
|
|
* @return false|int this functions returns an integer or false if there isn't a string |
10457
|
|
|
*/ |
10458
|
5 |
|
public static function substr_count( |
10459
|
|
|
string $haystack, |
10460
|
|
|
string $needle, |
10461
|
|
|
int $offset = 0, |
10462
|
|
|
int $length = null, |
10463
|
|
|
string $encoding = 'UTF-8', |
10464
|
|
|
bool $cleanUtf8 = false |
10465
|
|
|
) { |
10466
|
5 |
|
if ($haystack === '' || $needle === '') { |
10467
|
2 |
|
return false; |
10468
|
|
|
} |
10469
|
|
|
|
10470
|
5 |
|
if ($length === 0) { |
10471
|
2 |
|
return 0; |
10472
|
|
|
} |
10473
|
|
|
|
10474
|
5 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
10475
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10476
|
|
|
} |
10477
|
|
|
|
10478
|
5 |
|
if ($cleanUtf8 === true) { |
10479
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
10480
|
|
|
// if invalid characters are found in $haystack before $needle |
10481
|
|
|
$needle = self::clean($needle); |
10482
|
|
|
$haystack = self::clean($haystack); |
10483
|
|
|
} |
10484
|
|
|
|
10485
|
5 |
|
if ($offset || $length > 0) { |
10486
|
2 |
|
if ($length === null) { |
10487
|
2 |
|
$lengthTmp = self::strlen($haystack, $encoding); |
10488
|
2 |
|
if ($lengthTmp === false) { |
10489
|
|
|
return false; |
10490
|
|
|
} |
10491
|
2 |
|
$length = (int) $lengthTmp; |
10492
|
|
|
} |
10493
|
|
|
|
10494
|
2 |
|
if ($encoding === 'UTF-8') { |
10495
|
2 |
|
$haystack = (string) \mb_substr($haystack, $offset, $length); |
10496
|
|
|
} else { |
10497
|
2 |
|
$haystack = (string) \mb_substr($haystack, $offset, $length, $encoding); |
10498
|
|
|
} |
10499
|
|
|
} |
10500
|
|
|
|
10501
|
|
|
if ( |
10502
|
5 |
|
$encoding !== 'UTF-8' |
10503
|
|
|
&& |
10504
|
5 |
|
self::$SUPPORT['mbstring'] === false |
10505
|
|
|
) { |
10506
|
|
|
\trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
10507
|
|
|
} |
10508
|
|
|
|
10509
|
5 |
|
if (self::$SUPPORT['mbstring'] === true) { |
10510
|
5 |
|
if ($encoding === 'UTF-8') { |
10511
|
5 |
|
return \mb_substr_count($haystack, $needle); |
10512
|
|
|
} |
10513
|
|
|
|
10514
|
2 |
|
return \mb_substr_count($haystack, $needle, $encoding); |
10515
|
|
|
} |
10516
|
|
|
|
10517
|
|
|
\preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER); |
10518
|
|
|
|
10519
|
|
|
return \count($matches); |
10520
|
|
|
} |
10521
|
|
|
|
10522
|
|
|
/** |
10523
|
|
|
* Count the number of substring occurrences. |
10524
|
|
|
* |
10525
|
|
|
* @param string $haystack <p> |
10526
|
|
|
* The string being checked. |
10527
|
|
|
* </p> |
10528
|
|
|
* @param string $needle <p> |
10529
|
|
|
* The string being found. |
10530
|
|
|
* </p> |
10531
|
|
|
* @param int $offset [optional] <p> |
10532
|
|
|
* The offset where to start counting |
10533
|
|
|
* </p> |
10534
|
|
|
* @param int $length [optional] <p> |
10535
|
|
|
* The maximum length after the specified offset to search for the |
10536
|
|
|
* substring. It outputs a warning if the offset plus the length is |
10537
|
|
|
* greater than the haystack length. |
10538
|
|
|
* </p> |
10539
|
|
|
* |
10540
|
|
|
* @return false|int the number of times the |
10541
|
|
|
* needle substring occurs in the |
10542
|
|
|
* haystack string |
10543
|
|
|
*/ |
10544
|
|
|
public static function substr_count_in_byte( |
10545
|
|
|
string $haystack, |
10546
|
|
|
string $needle, |
10547
|
|
|
int $offset = 0, |
10548
|
|
|
int $length = null |
10549
|
|
|
) { |
10550
|
|
|
if ($haystack === '' || $needle === '') { |
10551
|
|
|
return 0; |
10552
|
|
|
} |
10553
|
|
|
|
10554
|
|
|
if ( |
10555
|
|
|
($offset || $length !== null) |
10556
|
|
|
&& |
10557
|
|
|
self::$SUPPORT['mbstring_func_overload'] === true |
10558
|
|
|
) { |
10559
|
|
|
if ($length === null) { |
10560
|
|
|
$lengthTmp = self::strlen($haystack); |
10561
|
|
|
if ($lengthTmp === false) { |
10562
|
|
|
return false; |
10563
|
|
|
} |
10564
|
|
|
$length = (int) $lengthTmp; |
10565
|
|
|
} |
10566
|
|
|
|
10567
|
|
|
if ( |
10568
|
|
|
( |
10569
|
|
|
$length !== 0 |
10570
|
|
|
&& |
10571
|
|
|
$offset !== 0 |
10572
|
|
|
) |
10573
|
|
|
&& |
10574
|
|
|
($length + $offset) <= 0 |
10575
|
|
|
&& |
10576
|
|
|
Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1 |
10577
|
|
|
) { |
10578
|
|
|
return false; |
10579
|
|
|
} |
10580
|
|
|
|
10581
|
|
|
$haystackTmp = \substr($haystack, $offset, $length); |
10582
|
|
|
if ($haystackTmp === false) { |
10583
|
|
|
$haystackTmp = ''; |
10584
|
|
|
} |
10585
|
|
|
$haystack = (string) $haystackTmp; |
10586
|
|
|
} |
10587
|
|
|
|
10588
|
|
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
10589
|
|
|
// "mb_" is available if overload is used, so use it ... |
10590
|
|
|
return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT |
10591
|
|
|
} |
10592
|
|
|
|
10593
|
|
|
if ($length === null) { |
10594
|
|
|
return \substr_count($haystack, $needle, $offset); |
10595
|
|
|
} |
10596
|
|
|
|
10597
|
|
|
return \substr_count($haystack, $needle, $offset, $length); |
10598
|
|
|
} |
10599
|
|
|
|
10600
|
|
|
/** |
10601
|
|
|
* Returns the number of occurrences of $substring in the given string. |
10602
|
|
|
* By default, the comparison is case-sensitive, but can be made insensitive |
10603
|
|
|
* by setting $caseSensitive to false. |
10604
|
|
|
* |
10605
|
|
|
* @param string $str <p>The input string.</p> |
10606
|
|
|
* @param string $substring <p>The substring to search for.</p> |
10607
|
|
|
* @param bool $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
10608
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10609
|
|
|
* |
10610
|
|
|
* @return int |
10611
|
|
|
*/ |
10612
|
15 |
|
public static function substr_count_simple( |
10613
|
|
|
string $str, |
10614
|
|
|
string $substring, |
10615
|
|
|
bool $caseSensitive = true, |
10616
|
|
|
string $encoding = 'UTF-8' |
10617
|
|
|
): int { |
10618
|
15 |
|
if ($str === '' || $substring === '') { |
10619
|
2 |
|
return 0; |
10620
|
|
|
} |
10621
|
|
|
|
10622
|
13 |
|
if ($encoding === 'UTF-8') { |
10623
|
7 |
|
if ($caseSensitive) { |
10624
|
|
|
return (int) \mb_substr_count($str, $substring); |
10625
|
|
|
} |
10626
|
|
|
|
10627
|
7 |
|
return (int) \mb_substr_count( |
10628
|
7 |
|
\mb_strtoupper($str), |
10629
|
7 |
|
\mb_strtoupper($substring) |
10630
|
|
|
|
10631
|
|
|
); |
10632
|
|
|
} |
10633
|
|
|
|
10634
|
6 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10635
|
|
|
|
10636
|
6 |
|
if ($caseSensitive) { |
10637
|
3 |
|
return (int) \mb_substr_count($str, $substring, $encoding); |
10638
|
|
|
} |
10639
|
|
|
|
10640
|
3 |
|
return (int) \mb_substr_count( |
10641
|
3 |
|
self::strtocasefold($str, true, false, $encoding, null, false), |
10642
|
3 |
|
self::strtocasefold($substring, true, false, $encoding, null, false), |
10643
|
3 |
|
$encoding |
10644
|
|
|
); |
10645
|
|
|
} |
10646
|
|
|
|
10647
|
|
|
/** |
10648
|
|
|
* Removes an prefix ($needle) from start of the string ($haystack), case insensitive. |
10649
|
|
|
* |
10650
|
|
|
* @param string $haystack <p>The string to search in.</p> |
10651
|
|
|
* @param string $needle <p>The substring to search for.</p> |
10652
|
|
|
* |
10653
|
|
|
* @return string return the sub-string |
10654
|
|
|
*/ |
10655
|
2 |
|
public static function substr_ileft(string $haystack, string $needle): string |
10656
|
|
|
{ |
10657
|
2 |
|
if ($haystack === '') { |
10658
|
2 |
|
return ''; |
10659
|
|
|
} |
10660
|
|
|
|
10661
|
2 |
|
if ($needle === '') { |
10662
|
2 |
|
return $haystack; |
10663
|
|
|
} |
10664
|
|
|
|
10665
|
2 |
|
if (self::str_istarts_with($haystack, $needle) === true) { |
10666
|
2 |
|
$haystack = (string) \mb_substr($haystack, (int) self::strlen($needle)); |
10667
|
|
|
} |
10668
|
|
|
|
10669
|
2 |
|
return $haystack; |
10670
|
|
|
} |
10671
|
|
|
|
10672
|
|
|
/** |
10673
|
|
|
* Get part of a string process in bytes. |
10674
|
|
|
* |
10675
|
|
|
* @param string $str <p>The string being checked.</p> |
10676
|
|
|
* @param int $offset <p>The first position used in str.</p> |
10677
|
|
|
* @param int $length [optional] <p>The maximum length of the returned string.</p> |
10678
|
|
|
* |
10679
|
|
|
* @return false|string |
10680
|
|
|
* The portion of <i>str</i> specified by the <i>offset</i> and |
10681
|
|
|
* <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i> |
10682
|
|
|
* characters long, <b>FALSE</b> will be returned. |
10683
|
|
|
*/ |
10684
|
|
|
public static function substr_in_byte(string $str, int $offset = 0, int $length = null) |
10685
|
|
|
{ |
10686
|
|
|
// empty string |
10687
|
|
|
if ($str === '' || $length === 0) { |
10688
|
|
|
return ''; |
10689
|
|
|
} |
10690
|
|
|
|
10691
|
|
|
// whole string |
10692
|
|
|
if (!$offset && $length === null) { |
10693
|
|
|
return $str; |
10694
|
|
|
} |
10695
|
|
|
|
10696
|
|
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
10697
|
|
|
// "mb_" is available if overload is used, so use it ... |
10698
|
|
|
return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT |
10699
|
|
|
} |
10700
|
|
|
|
10701
|
|
|
return \substr($str, $offset, $length ?? 2147483647); |
10702
|
|
|
} |
10703
|
|
|
|
10704
|
|
|
/** |
10705
|
|
|
* Removes an suffix ($needle) from end of the string ($haystack), case insensitive. |
10706
|
|
|
* |
10707
|
|
|
* @param string $haystack <p>The string to search in.</p> |
10708
|
|
|
* @param string $needle <p>The substring to search for.</p> |
10709
|
|
|
* |
10710
|
|
|
* @return string return the sub-string |
10711
|
|
|
*/ |
10712
|
2 |
|
public static function substr_iright(string $haystack, string $needle): string |
10713
|
|
|
{ |
10714
|
2 |
|
if ($haystack === '') { |
10715
|
2 |
|
return ''; |
10716
|
|
|
} |
10717
|
|
|
|
10718
|
2 |
|
if ($needle === '') { |
10719
|
2 |
|
return $haystack; |
10720
|
|
|
} |
10721
|
|
|
|
10722
|
2 |
|
if (self::str_iends_with($haystack, $needle) === true) { |
10723
|
2 |
|
$haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle)); |
10724
|
|
|
} |
10725
|
|
|
|
10726
|
2 |
|
return $haystack; |
10727
|
|
|
} |
10728
|
|
|
|
10729
|
|
|
/** |
10730
|
|
|
* Removes an prefix ($needle) from start of the string ($haystack). |
10731
|
|
|
* |
10732
|
|
|
* @param string $haystack <p>The string to search in.</p> |
10733
|
|
|
* @param string $needle <p>The substring to search for.</p> |
10734
|
|
|
* |
10735
|
|
|
* @return string return the sub-string |
10736
|
|
|
*/ |
10737
|
2 |
|
public static function substr_left(string $haystack, string $needle): string |
10738
|
|
|
{ |
10739
|
2 |
|
if ($haystack === '') { |
10740
|
2 |
|
return ''; |
10741
|
|
|
} |
10742
|
|
|
|
10743
|
2 |
|
if ($needle === '') { |
10744
|
2 |
|
return $haystack; |
10745
|
|
|
} |
10746
|
|
|
|
10747
|
2 |
|
if (self::str_starts_with($haystack, $needle) === true) { |
10748
|
2 |
|
$haystack = (string) \mb_substr($haystack, (int) self::strlen($needle)); |
10749
|
|
|
} |
10750
|
|
|
|
10751
|
2 |
|
return $haystack; |
10752
|
|
|
} |
10753
|
|
|
|
10754
|
|
|
/** |
10755
|
|
|
* Replace text within a portion of a string. |
10756
|
|
|
* |
10757
|
|
|
* source: https://gist.github.com/stemar/8287074 |
10758
|
|
|
* |
10759
|
|
|
* @param string|string[] $str <p>The input string or an array of stings.</p> |
10760
|
|
|
* @param string|string[] $replacement <p>The replacement string or an array of stings.</p> |
10761
|
|
|
* @param int|int[] $offset <p> |
10762
|
|
|
* If start is positive, the replacing will begin at the start'th offset |
10763
|
|
|
* into string. |
10764
|
|
|
* <br><br> |
10765
|
|
|
* If start is negative, the replacing will begin at the start'th character |
10766
|
|
|
* from the end of string. |
10767
|
|
|
* </p> |
10768
|
|
|
* @param int|int[]|null $length [optional] <p>If given and is positive, it represents the length of the |
10769
|
|
|
* portion of string which is to be replaced. If it is negative, it |
10770
|
|
|
* represents the number of characters from the end of string at which to |
10771
|
|
|
* stop replacing. If it is not given, then it will default to strlen( |
10772
|
|
|
* string ); i.e. end the replacing at the end of string. Of course, if |
10773
|
|
|
* length is zero then this function will have the effect of inserting |
10774
|
|
|
* replacement into string at the given start offset.</p> |
10775
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10776
|
|
|
* |
10777
|
|
|
* @return string|string[] The result string is returned. If string is an array then array is returned. |
10778
|
|
|
*/ |
10779
|
10 |
|
public static function substr_replace( |
10780
|
|
|
$str, |
10781
|
|
|
$replacement, |
10782
|
|
|
$offset, |
10783
|
|
|
$length = null, |
10784
|
|
|
string $encoding = 'UTF-8' |
10785
|
|
|
) { |
10786
|
10 |
|
if (\is_array($str) === true) { |
10787
|
1 |
|
$num = \count($str); |
10788
|
|
|
|
10789
|
|
|
// the replacement |
10790
|
1 |
|
if (\is_array($replacement) === true) { |
10791
|
1 |
|
$replacement = \array_slice($replacement, 0, $num); |
10792
|
|
|
} else { |
10793
|
1 |
|
$replacement = \array_pad([$replacement], $num, $replacement); |
10794
|
|
|
} |
10795
|
|
|
|
10796
|
|
|
// the offset |
10797
|
1 |
|
if (\is_array($offset) === true) { |
10798
|
1 |
|
$offset = \array_slice($offset, 0, $num); |
10799
|
1 |
|
foreach ($offset as &$valueTmp) { |
10800
|
1 |
|
$valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0; |
10801
|
|
|
} |
10802
|
1 |
|
unset($valueTmp); |
10803
|
|
|
} else { |
10804
|
1 |
|
$offset = \array_pad([$offset], $num, $offset); |
10805
|
|
|
} |
10806
|
|
|
|
10807
|
|
|
// the length |
10808
|
1 |
|
if ($length === null) { |
10809
|
1 |
|
$length = \array_fill(0, $num, 0); |
10810
|
1 |
|
} elseif (\is_array($length) === true) { |
10811
|
1 |
|
$length = \array_slice($length, 0, $num); |
10812
|
1 |
|
foreach ($length as &$valueTmpV2) { |
10813
|
1 |
|
$valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num; |
10814
|
|
|
} |
10815
|
1 |
|
unset($valueTmpV2); |
10816
|
|
|
} else { |
10817
|
1 |
|
$length = \array_pad([$length], $num, $length); |
10818
|
|
|
} |
10819
|
|
|
|
10820
|
|
|
// recursive call |
10821
|
1 |
|
return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length); |
10822
|
|
|
} |
10823
|
|
|
|
10824
|
10 |
|
if (\is_array($replacement) === true) { |
10825
|
1 |
|
if (\count($replacement) > 0) { |
10826
|
1 |
|
$replacement = $replacement[0]; |
10827
|
|
|
} else { |
10828
|
1 |
|
$replacement = ''; |
10829
|
|
|
} |
10830
|
|
|
} |
10831
|
|
|
|
10832
|
|
|
// init |
10833
|
10 |
|
$str = (string) $str; |
10834
|
10 |
|
$replacement = (string) $replacement; |
10835
|
|
|
|
10836
|
10 |
|
if (\is_array($length) === true) { |
10837
|
|
|
throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.'); |
10838
|
|
|
} |
10839
|
|
|
|
10840
|
10 |
|
if (\is_array($offset) === true) { |
10841
|
|
|
throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.'); |
10842
|
|
|
} |
10843
|
|
|
|
10844
|
10 |
|
if ($str === '') { |
10845
|
1 |
|
return $replacement; |
10846
|
|
|
} |
10847
|
|
|
|
10848
|
9 |
|
if (self::$SUPPORT['mbstring'] === true) { |
10849
|
9 |
|
$string_length = (int) self::strlen($str, $encoding); |
10850
|
|
|
|
10851
|
9 |
|
if ($offset < 0) { |
10852
|
1 |
|
$offset = (int) \max(0, $string_length + $offset); |
10853
|
9 |
|
} elseif ($offset > $string_length) { |
10854
|
1 |
|
$offset = $string_length; |
10855
|
|
|
} |
10856
|
|
|
|
10857
|
9 |
|
if ($length !== null && $length < 0) { |
10858
|
1 |
|
$length = (int) \max(0, $string_length - $offset + $length); |
10859
|
9 |
|
} elseif ($length === null || $length > $string_length) { |
10860
|
4 |
|
$length = $string_length; |
10861
|
|
|
} |
10862
|
|
|
|
10863
|
|
|
/** @noinspection AdditionOperationOnArraysInspection */ |
10864
|
9 |
|
if (($offset + $length) > $string_length) { |
10865
|
4 |
|
$length = $string_length - $offset; |
10866
|
|
|
} |
10867
|
|
|
|
10868
|
|
|
/** @noinspection AdditionOperationOnArraysInspection */ |
10869
|
9 |
|
return ((string) \mb_substr($str, 0, $offset, $encoding)) . |
10870
|
9 |
|
$replacement . |
10871
|
9 |
|
((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding)); |
10872
|
|
|
} |
10873
|
|
|
|
10874
|
|
|
// |
10875
|
|
|
// fallback for ascii only |
10876
|
|
|
// |
10877
|
|
|
|
10878
|
|
|
if (self::is_ascii($str)) { |
10879
|
|
|
return ($length === null) ? |
10880
|
|
|
\substr_replace($str, $replacement, $offset) : |
10881
|
|
|
\substr_replace($str, $replacement, $offset, $length); |
10882
|
|
|
} |
10883
|
|
|
|
10884
|
|
|
// |
10885
|
|
|
// fallback via vanilla php |
10886
|
|
|
// |
10887
|
|
|
|
10888
|
|
|
\preg_match_all('/./us', $str, $smatches); |
10889
|
|
|
\preg_match_all('/./us', $replacement, $rmatches); |
10890
|
|
|
|
10891
|
|
|
if ($length === null) { |
10892
|
|
|
$lengthTmp = self::strlen($str, $encoding); |
10893
|
|
|
if ($lengthTmp === false) { |
10894
|
|
|
// e.g.: non mbstring support + invalid chars |
10895
|
|
|
return ''; |
10896
|
|
|
} |
10897
|
|
|
$length = (int) $lengthTmp; |
10898
|
|
|
} |
10899
|
|
|
|
10900
|
|
|
\array_splice($smatches[0], $offset, $length, $rmatches[0]); |
10901
|
|
|
|
10902
|
|
|
return \implode('', $smatches[0]); |
10903
|
|
|
} |
10904
|
|
|
|
10905
|
|
|
/** |
10906
|
|
|
* Removes an suffix ($needle) from end of the string ($haystack). |
10907
|
|
|
* |
10908
|
|
|
* @param string $haystack <p>The string to search in.</p> |
10909
|
|
|
* @param string $needle <p>The substring to search for.</p> |
10910
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10911
|
|
|
* |
10912
|
|
|
* @return string return the sub-string |
10913
|
|
|
*/ |
10914
|
2 |
|
public static function substr_right( |
10915
|
|
|
string $haystack, |
10916
|
|
|
string $needle, |
10917
|
|
|
string $encoding = 'UTF-8' |
10918
|
|
|
): string { |
10919
|
2 |
|
if ($haystack === '') { |
10920
|
2 |
|
return ''; |
10921
|
|
|
} |
10922
|
|
|
|
10923
|
2 |
|
if ($needle === '') { |
10924
|
2 |
|
return $haystack; |
10925
|
|
|
} |
10926
|
|
|
|
10927
|
|
|
if ( |
10928
|
2 |
|
$encoding === 'UTF-8' |
10929
|
|
|
&& |
10930
|
2 |
|
\substr($haystack, -\strlen($needle)) === $needle |
10931
|
|
|
) { |
10932
|
2 |
|
return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle)); |
10933
|
|
|
} |
10934
|
|
|
|
10935
|
2 |
|
if (\substr($haystack, -\strlen($needle)) === $needle) { |
10936
|
|
|
return (string) self::substr( |
10937
|
|
|
$haystack, |
10938
|
|
|
0, |
10939
|
|
|
(int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding), |
10940
|
|
|
$encoding |
10941
|
|
|
); |
10942
|
|
|
} |
10943
|
|
|
|
10944
|
2 |
|
return $haystack; |
10945
|
|
|
} |
10946
|
|
|
|
10947
|
|
|
/** |
10948
|
|
|
* Returns a case swapped version of the string. |
10949
|
|
|
* |
10950
|
|
|
* @param string $str <p>The input string.</p> |
10951
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10952
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10953
|
|
|
* |
10954
|
|
|
* @return string each character's case swapped |
10955
|
|
|
*/ |
10956
|
6 |
|
public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string |
10957
|
|
|
{ |
10958
|
6 |
|
if ($str === '') { |
10959
|
1 |
|
return ''; |
10960
|
|
|
} |
10961
|
|
|
|
10962
|
6 |
|
if ($cleanUtf8 === true) { |
10963
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
10964
|
|
|
// if invalid characters are found in $haystack before $needle |
10965
|
2 |
|
$str = self::clean($str); |
10966
|
|
|
} |
10967
|
|
|
|
10968
|
6 |
|
if ($encoding === 'UTF-8') { |
10969
|
4 |
|
return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str); |
10970
|
|
|
} |
10971
|
|
|
|
10972
|
4 |
|
return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str); |
10973
|
|
|
} |
10974
|
|
|
|
10975
|
|
|
/** |
10976
|
|
|
* Checks whether symfony-polyfills are used. |
10977
|
|
|
* |
10978
|
|
|
* @return bool |
10979
|
|
|
* <strong>true</strong> if in use, <strong>false</strong> otherwise |
10980
|
|
|
*/ |
10981
|
|
|
public static function symfony_polyfill_used(): bool |
10982
|
|
|
{ |
10983
|
|
|
// init |
10984
|
|
|
$return = false; |
10985
|
|
|
|
10986
|
|
|
$returnTmp = \extension_loaded('mbstring'); |
10987
|
|
|
if ($returnTmp === false && \function_exists('mb_strlen')) { |
10988
|
|
|
$return = true; |
10989
|
|
|
} |
10990
|
|
|
|
10991
|
|
|
$returnTmp = \extension_loaded('iconv'); |
10992
|
|
|
if ($returnTmp === false && \function_exists('iconv')) { |
10993
|
|
|
$return = true; |
10994
|
|
|
} |
10995
|
|
|
|
10996
|
|
|
return $return; |
10997
|
|
|
} |
10998
|
|
|
|
10999
|
|
|
/** |
11000
|
|
|
* @param string $str |
11001
|
|
|
* @param int $tabLength |
11002
|
|
|
* |
11003
|
|
|
* @return string |
11004
|
|
|
*/ |
11005
|
6 |
|
public static function tabs_to_spaces(string $str, int $tabLength = 4): string |
11006
|
|
|
{ |
11007
|
6 |
|
if ($tabLength === 4) { |
11008
|
3 |
|
$spaces = ' '; |
11009
|
3 |
|
} elseif ($tabLength === 2) { |
11010
|
1 |
|
$spaces = ' '; |
11011
|
|
|
} else { |
11012
|
2 |
|
$spaces = \str_repeat(' ', $tabLength); |
11013
|
|
|
} |
11014
|
|
|
|
11015
|
6 |
|
return \str_replace("\t", $spaces, $str); |
11016
|
|
|
} |
11017
|
|
|
|
11018
|
|
|
/** |
11019
|
|
|
* Converts the first character of each word in the string to uppercase |
11020
|
|
|
* and all other chars to lowercase. |
11021
|
|
|
* |
11022
|
|
|
* @param string $str <p>The input string.</p> |
11023
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
11024
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
11025
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
11026
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
11027
|
|
|
* |
11028
|
|
|
* @return string string with all characters of $str being title-cased |
11029
|
|
|
*/ |
11030
|
5 |
|
public static function titlecase( |
11031
|
|
|
string $str, |
11032
|
|
|
string $encoding = 'UTF-8', |
11033
|
|
|
bool $cleanUtf8 = false, |
11034
|
|
|
string $lang = null, |
11035
|
|
|
bool $tryToKeepStringLength = false |
11036
|
|
|
): string { |
11037
|
5 |
|
if ($cleanUtf8 === true) { |
11038
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
11039
|
|
|
// if invalid characters are found in $haystack before $needle |
11040
|
|
|
$str = self::clean($str); |
11041
|
|
|
} |
11042
|
|
|
|
11043
|
5 |
|
if ($lang === null && $tryToKeepStringLength === false) { |
11044
|
5 |
|
if ($encoding === 'UTF-8') { |
11045
|
3 |
|
return \mb_convert_case($str, \MB_CASE_TITLE); |
11046
|
|
|
} |
11047
|
|
|
|
11048
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
11049
|
|
|
|
11050
|
2 |
|
return \mb_convert_case($str, \MB_CASE_TITLE, $encoding); |
11051
|
|
|
} |
11052
|
|
|
|
11053
|
|
|
return self::str_titleize($str, null, $encoding, false, $lang, $tryToKeepStringLength, false); |
11054
|
|
|
} |
11055
|
|
|
|
11056
|
|
|
/** |
11057
|
|
|
* alias for "UTF8::to_ascii()" |
11058
|
|
|
* |
11059
|
|
|
* @param string $str |
11060
|
|
|
* @param string $subst_chr |
11061
|
|
|
* @param bool $strict |
11062
|
|
|
* |
11063
|
|
|
* @return string |
11064
|
|
|
* |
11065
|
|
|
* @see UTF8::to_ascii() |
11066
|
|
|
* @deprecated <p>use "UTF8::to_ascii()"</p> |
11067
|
|
|
*/ |
11068
|
7 |
|
public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string |
11069
|
|
|
{ |
11070
|
7 |
|
return self::to_ascii($str, $subst_chr, $strict); |
11071
|
|
|
} |
11072
|
|
|
|
11073
|
|
|
/** |
11074
|
|
|
* alias for "UTF8::to_iso8859()" |
11075
|
|
|
* |
11076
|
|
|
* @param string|string[] $str |
11077
|
|
|
* |
11078
|
|
|
* @return string|string[] |
11079
|
|
|
* |
11080
|
|
|
* @see UTF8::to_iso8859() |
11081
|
|
|
* @deprecated <p>use "UTF8::to_iso8859()"</p> |
11082
|
|
|
*/ |
11083
|
2 |
|
public static function toIso8859($str) |
11084
|
|
|
{ |
11085
|
2 |
|
return self::to_iso8859($str); |
11086
|
|
|
} |
11087
|
|
|
|
11088
|
|
|
/** |
11089
|
|
|
* alias for "UTF8::to_latin1()" |
11090
|
|
|
* |
11091
|
|
|
* @param string|string[] $str |
11092
|
|
|
* |
11093
|
|
|
* @return string|string[] |
11094
|
|
|
* |
11095
|
|
|
* @see UTF8::to_latin1() |
11096
|
|
|
* @deprecated <p>use "UTF8::to_latin1()"</p> |
11097
|
|
|
*/ |
11098
|
2 |
|
public static function toLatin1($str) |
11099
|
|
|
{ |
11100
|
2 |
|
return self::to_latin1($str); |
11101
|
|
|
} |
11102
|
|
|
|
11103
|
|
|
/** |
11104
|
|
|
* alias for "UTF8::to_utf8()" |
11105
|
|
|
* |
11106
|
|
|
* @param string|string[] $str |
11107
|
|
|
* |
11108
|
|
|
* @return string|string[] |
11109
|
|
|
* |
11110
|
|
|
* @see UTF8::to_utf8() |
11111
|
|
|
* @deprecated <p>use "UTF8::to_utf8()"</p> |
11112
|
|
|
*/ |
11113
|
2 |
|
public static function toUTF8($str) |
11114
|
|
|
{ |
11115
|
2 |
|
return self::to_utf8($str); |
11116
|
|
|
} |
11117
|
|
|
|
11118
|
|
|
/** |
11119
|
|
|
* Convert a string into ASCII. |
11120
|
|
|
* |
11121
|
|
|
* @param string $str <p>The input string.</p> |
11122
|
|
|
* @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p> |
11123
|
|
|
* @param bool $strict [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad |
11124
|
|
|
* performance</p> |
11125
|
|
|
* |
11126
|
|
|
* @return string |
11127
|
|
|
*/ |
11128
|
38 |
|
public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string |
11129
|
|
|
{ |
11130
|
38 |
|
static $UTF8_TO_ASCII; |
11131
|
|
|
|
11132
|
38 |
|
if ($str === '') { |
11133
|
3 |
|
return ''; |
11134
|
|
|
} |
11135
|
|
|
|
11136
|
|
|
// check if we only have ASCII, first (better performance) |
11137
|
35 |
|
if (self::is_ascii($str) === true) { |
11138
|
9 |
|
return $str; |
11139
|
|
|
} |
11140
|
|
|
|
11141
|
28 |
|
$str = self::clean( |
11142
|
28 |
|
$str, |
11143
|
28 |
|
true, |
11144
|
28 |
|
true, |
11145
|
28 |
|
true, |
11146
|
28 |
|
false, |
11147
|
28 |
|
true, |
11148
|
28 |
|
true |
11149
|
|
|
); |
11150
|
|
|
|
11151
|
|
|
// check again, if we only have ASCII, now ... |
11152
|
28 |
|
if (self::is_ascii($str) === true) { |
11153
|
10 |
|
return $str; |
11154
|
|
|
} |
11155
|
|
|
|
11156
|
|
|
if ( |
11157
|
19 |
|
$strict === true |
11158
|
|
|
&& |
11159
|
19 |
|
self::$SUPPORT['intl'] === true |
11160
|
|
|
) { |
11161
|
|
|
// INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C |
11162
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
11163
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
11164
|
1 |
|
$str = (string) \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str); |
11165
|
|
|
|
11166
|
|
|
// check again, if we only have ASCII, now ... |
11167
|
1 |
|
if (self::is_ascii($str) === true) { |
11168
|
1 |
|
return $str; |
11169
|
|
|
} |
11170
|
|
|
} |
11171
|
|
|
|
11172
|
19 |
|
if (self::$ORD === null) { |
11173
|
|
|
self::$ORD = self::getData('ord'); |
11174
|
|
|
} |
11175
|
|
|
|
11176
|
19 |
|
\preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar); |
11177
|
19 |
|
$chars = $ar[0]; |
11178
|
19 |
|
$ord = null; |
11179
|
|
|
/** @noinspection ForeachSourceInspection */ |
11180
|
19 |
|
foreach ($chars as &$c) { |
11181
|
19 |
|
$ordC0 = self::$ORD[$c[0]]; |
11182
|
|
|
|
11183
|
19 |
|
if ($ordC0 >= 0 && $ordC0 <= 127) { |
11184
|
15 |
|
continue; |
11185
|
|
|
} |
11186
|
|
|
|
11187
|
19 |
|
$ordC1 = self::$ORD[$c[1]]; |
11188
|
|
|
|
11189
|
|
|
// ASCII - next please |
11190
|
19 |
|
if ($ordC0 >= 192 && $ordC0 <= 223) { |
11191
|
17 |
|
$ord = ($ordC0 - 192) * 64 + ($ordC1 - 128); |
11192
|
|
|
} |
11193
|
|
|
|
11194
|
19 |
|
if ($ordC0 >= 224) { |
11195
|
8 |
|
$ordC2 = self::$ORD[$c[2]]; |
11196
|
|
|
|
11197
|
8 |
|
if ($ordC0 <= 239) { |
11198
|
7 |
|
$ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128); |
11199
|
|
|
} |
11200
|
|
|
|
11201
|
8 |
|
if ($ordC0 >= 240) { |
11202
|
2 |
|
$ordC3 = self::$ORD[$c[3]]; |
11203
|
|
|
|
11204
|
2 |
|
if ($ordC0 <= 247) { |
11205
|
2 |
|
$ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128); |
11206
|
|
|
} |
11207
|
|
|
|
11208
|
2 |
|
if ($ordC0 >= 248) { |
11209
|
|
|
$ordC4 = self::$ORD[$c[4]]; |
11210
|
|
|
|
11211
|
|
|
if ($ordC0 <= 251) { |
11212
|
|
|
$ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128); |
11213
|
|
|
} |
11214
|
|
|
|
11215
|
|
|
if ($ordC0 >= 252) { |
11216
|
|
|
$ordC5 = self::$ORD[$c[5]]; |
11217
|
|
|
|
11218
|
|
|
if ($ordC0 <= 253) { |
11219
|
|
|
$ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128); |
11220
|
|
|
} |
11221
|
|
|
} |
11222
|
|
|
} |
11223
|
|
|
} |
11224
|
|
|
} |
11225
|
|
|
|
11226
|
19 |
|
if ($ordC0 === 254 || $ordC0 === 255) { |
11227
|
|
|
$c = $unknown; |
11228
|
|
|
|
11229
|
|
|
continue; |
11230
|
|
|
} |
11231
|
|
|
|
11232
|
19 |
|
if ($ord === null) { |
11233
|
|
|
$c = $unknown; |
11234
|
|
|
|
11235
|
|
|
continue; |
11236
|
|
|
} |
11237
|
|
|
|
11238
|
19 |
|
$bank = $ord >> 8; |
11239
|
19 |
|
if (!isset($UTF8_TO_ASCII[$bank])) { |
11240
|
9 |
|
$UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank)); |
11241
|
9 |
|
if ($UTF8_TO_ASCII[$bank] === false) { |
11242
|
2 |
|
$UTF8_TO_ASCII[$bank] = []; |
11243
|
|
|
} |
11244
|
|
|
} |
11245
|
|
|
|
11246
|
19 |
|
$newchar = $ord & 255; |
11247
|
|
|
|
11248
|
|
|
/** @noinspection NullCoalescingOperatorCanBeUsedInspection */ |
11249
|
19 |
|
if (isset($UTF8_TO_ASCII[$bank][$newchar])) { |
11250
|
|
|
|
11251
|
|
|
// keep for debugging |
11252
|
|
|
/* |
11253
|
|
|
echo "file: " . sprintf('x%02x', $bank) . "\n"; |
11254
|
|
|
echo "char: " . $c . "\n"; |
11255
|
|
|
echo "ord: " . $ord . "\n"; |
11256
|
|
|
echo "newchar: " . $newchar . "\n"; |
11257
|
|
|
echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n"; |
11258
|
|
|
echo "bank:" . $bank . "\n\n"; |
11259
|
|
|
*/ |
11260
|
|
|
|
11261
|
18 |
|
$c = $UTF8_TO_ASCII[$bank][$newchar]; |
11262
|
|
|
} else { |
11263
|
|
|
|
11264
|
|
|
// keep for debugging missing chars |
11265
|
|
|
/* |
11266
|
|
|
echo "file: " . sprintf('x%02x', $bank) . "\n"; |
11267
|
|
|
echo "char: " . $c . "\n"; |
11268
|
|
|
echo "ord: " . $ord . "\n"; |
11269
|
|
|
echo "newchar: " . $newchar . "\n"; |
11270
|
|
|
echo "bank:" . $bank . "\n\n"; |
11271
|
|
|
*/ |
11272
|
|
|
|
11273
|
19 |
|
$c = $unknown; |
11274
|
|
|
} |
11275
|
|
|
} |
11276
|
|
|
|
11277
|
19 |
|
return \implode('', $chars); |
11278
|
|
|
} |
11279
|
|
|
|
11280
|
|
|
/** |
11281
|
|
|
* @param mixed $str |
11282
|
|
|
* |
11283
|
|
|
* @return bool |
11284
|
|
|
*/ |
11285
|
19 |
|
public static function to_boolean($str): bool |
11286
|
|
|
{ |
11287
|
|
|
// init |
11288
|
19 |
|
$str = (string) $str; |
11289
|
|
|
|
11290
|
19 |
|
if ($str === '') { |
11291
|
2 |
|
return false; |
11292
|
|
|
} |
11293
|
|
|
|
11294
|
|
|
// Info: http://php.net/manual/en/filter.filters.validate.php |
11295
|
|
|
$map = [ |
11296
|
17 |
|
'true' => true, |
11297
|
|
|
'1' => true, |
11298
|
|
|
'on' => true, |
11299
|
|
|
'yes' => true, |
11300
|
|
|
'false' => false, |
11301
|
|
|
'0' => false, |
11302
|
|
|
'off' => false, |
11303
|
|
|
'no' => false, |
11304
|
|
|
]; |
11305
|
|
|
|
11306
|
17 |
|
if (isset($map[$str])) { |
11307
|
11 |
|
return $map[$str]; |
11308
|
|
|
} |
11309
|
|
|
|
11310
|
6 |
|
$key = \strtolower($str); |
11311
|
6 |
|
if (isset($map[$key])) { |
11312
|
2 |
|
return $map[$key]; |
11313
|
|
|
} |
11314
|
|
|
|
11315
|
4 |
|
if (\is_numeric($str)) { |
11316
|
2 |
|
return ((float) $str + 0) > 0; |
11317
|
|
|
} |
11318
|
|
|
|
11319
|
2 |
|
return (bool) \trim($str); |
11320
|
|
|
} |
11321
|
|
|
|
11322
|
|
|
/** |
11323
|
|
|
* Convert given string to safe filename (and keep string case). |
11324
|
|
|
* |
11325
|
|
|
* @param string $string |
11326
|
|
|
* @param bool $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are |
11327
|
|
|
* simply replaced with hyphen. |
11328
|
|
|
* @param string $fallback_char |
11329
|
|
|
* |
11330
|
|
|
* @return string |
11331
|
|
|
*/ |
11332
|
1 |
|
public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string |
11333
|
|
|
{ |
11334
|
1 |
|
if ($use_transliterate === true) { |
11335
|
1 |
|
$string = self::str_transliterate($string, $fallback_char); |
11336
|
|
|
} |
11337
|
|
|
|
11338
|
1 |
|
$fallback_char_escaped = \preg_quote($fallback_char, '/'); |
11339
|
|
|
|
11340
|
1 |
|
$string = (string) \preg_replace( |
11341
|
|
|
[ |
11342
|
1 |
|
'/[^' . $fallback_char_escaped . '\\.\\-a-zA-Z0-9\\s]/', // 1) remove un-needed chars |
11343
|
1 |
|
'/[\\s]+/u', // 2) convert spaces to $fallback_char |
11344
|
1 |
|
'/[' . $fallback_char_escaped . ']+/u', // 3) remove double $fallback_char's |
11345
|
|
|
], |
11346
|
|
|
[ |
11347
|
1 |
|
'', |
11348
|
1 |
|
$fallback_char, |
11349
|
1 |
|
$fallback_char, |
11350
|
|
|
], |
11351
|
1 |
|
$string |
11352
|
|
|
); |
11353
|
|
|
|
11354
|
|
|
// trim "$fallback_char" from beginning and end of the string |
11355
|
1 |
|
return \trim($string, $fallback_char); |
11356
|
|
|
} |
11357
|
|
|
|
11358
|
|
|
/** |
11359
|
|
|
* Convert a string into "ISO-8859"-encoding (Latin-1). |
11360
|
|
|
* |
11361
|
|
|
* @param string|string[] $str |
11362
|
|
|
* |
11363
|
|
|
* @return string|string[] |
11364
|
|
|
*/ |
11365
|
8 |
|
public static function to_iso8859($str) |
11366
|
|
|
{ |
11367
|
8 |
|
if (\is_array($str) === true) { |
11368
|
2 |
|
foreach ($str as $k => &$v) { |
11369
|
2 |
|
$v = self::to_iso8859($v); |
11370
|
|
|
} |
11371
|
|
|
|
11372
|
2 |
|
return $str; |
11373
|
|
|
} |
11374
|
|
|
|
11375
|
8 |
|
$str = (string) $str; |
11376
|
8 |
|
if ($str === '') { |
11377
|
2 |
|
return ''; |
11378
|
|
|
} |
11379
|
|
|
|
11380
|
8 |
|
return self::utf8_decode($str); |
11381
|
|
|
} |
11382
|
|
|
|
11383
|
|
|
/** |
11384
|
|
|
* alias for "UTF8::to_iso8859()" |
11385
|
|
|
* |
11386
|
|
|
* @param string|string[] $str |
11387
|
|
|
* |
11388
|
|
|
* @return string|string[] |
11389
|
|
|
* |
11390
|
|
|
* @see UTF8::to_iso8859() |
11391
|
|
|
*/ |
11392
|
2 |
|
public static function to_latin1($str) |
11393
|
|
|
{ |
11394
|
2 |
|
return self::to_iso8859($str); |
11395
|
|
|
} |
11396
|
|
|
|
11397
|
|
|
/** |
11398
|
|
|
* This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8. |
11399
|
|
|
* |
11400
|
|
|
* <ul> |
11401
|
|
|
* <li>It decode UTF-8 codepoints and unicode escape sequences.</li> |
11402
|
|
|
* <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li> |
11403
|
|
|
* <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this |
11404
|
|
|
* case.</li> |
11405
|
|
|
* </ul> |
11406
|
|
|
* |
11407
|
|
|
* @param string|string[] $str <p>Any string or array.</p> |
11408
|
|
|
* @param bool $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p> |
11409
|
|
|
* |
11410
|
|
|
* @return string|string[] the UTF-8 encoded string |
11411
|
|
|
*/ |
11412
|
41 |
|
public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false) |
11413
|
|
|
{ |
11414
|
41 |
|
if (\is_array($str) === true) { |
11415
|
4 |
|
foreach ($str as $k => &$v) { |
11416
|
4 |
|
$v = self::to_utf8($v, $decodeHtmlEntityToUtf8); |
11417
|
|
|
} |
11418
|
|
|
|
11419
|
4 |
|
return $str; |
11420
|
|
|
} |
11421
|
|
|
|
11422
|
41 |
|
$str = (string) $str; |
11423
|
41 |
|
if ($str === '') { |
11424
|
6 |
|
return $str; |
11425
|
|
|
} |
11426
|
|
|
|
11427
|
41 |
|
$max = \strlen($str); |
11428
|
41 |
|
$buf = ''; |
11429
|
|
|
|
11430
|
41 |
|
for ($i = 0; $i < $max; ++$i) { |
11431
|
41 |
|
$c1 = $str[$i]; |
11432
|
|
|
|
11433
|
41 |
|
if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already |
11434
|
|
|
|
11435
|
37 |
|
if ($c1 <= "\xDF") { // looks like 2 bytes UTF8 |
11436
|
|
|
|
11437
|
34 |
|
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; |
11438
|
|
|
|
11439
|
34 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already |
11440
|
20 |
|
$buf .= $c1 . $c2; |
11441
|
20 |
|
++$i; |
11442
|
|
|
} else { // not valid UTF8 - convert it |
11443
|
34 |
|
$buf .= self::to_utf8_convert_helper($c1); |
11444
|
|
|
} |
11445
|
34 |
|
} elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8 |
11446
|
|
|
|
11447
|
33 |
|
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; |
11448
|
33 |
|
$c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2]; |
11449
|
|
|
|
11450
|
33 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already |
11451
|
15 |
|
$buf .= $c1 . $c2 . $c3; |
11452
|
15 |
|
$i += 2; |
11453
|
|
|
} else { // not valid UTF8 - convert it |
11454
|
33 |
|
$buf .= self::to_utf8_convert_helper($c1); |
11455
|
|
|
} |
11456
|
26 |
|
} elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8 |
11457
|
|
|
|
11458
|
26 |
|
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; |
11459
|
26 |
|
$c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2]; |
11460
|
26 |
|
$c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3]; |
11461
|
|
|
|
11462
|
26 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already |
11463
|
8 |
|
$buf .= $c1 . $c2 . $c3 . $c4; |
11464
|
8 |
|
$i += 3; |
11465
|
|
|
} else { // not valid UTF8 - convert it |
11466
|
26 |
|
$buf .= self::to_utf8_convert_helper($c1); |
11467
|
|
|
} |
11468
|
|
|
} else { // doesn't look like UTF8, but should be converted |
11469
|
|
|
|
11470
|
37 |
|
$buf .= self::to_utf8_convert_helper($c1); |
11471
|
|
|
} |
11472
|
38 |
|
} elseif (($c1 & "\xC0") === "\x80") { // needs conversion |
11473
|
|
|
|
11474
|
4 |
|
$buf .= self::to_utf8_convert_helper($c1); |
11475
|
|
|
} else { // it doesn't need conversion |
11476
|
|
|
|
11477
|
38 |
|
$buf .= $c1; |
11478
|
|
|
} |
11479
|
|
|
} |
11480
|
|
|
|
11481
|
|
|
// decode unicode escape sequences + unicode surrogate pairs |
11482
|
41 |
|
$buf = \preg_replace_callback( |
11483
|
41 |
|
'/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/', |
11484
|
|
|
/** |
11485
|
|
|
* @param array $matches |
11486
|
|
|
* |
11487
|
|
|
* @return string |
11488
|
|
|
*/ |
11489
|
|
|
static function (array $matches): string { |
11490
|
12 |
|
if (isset($matches[3])) { |
11491
|
12 |
|
$cp = (int) \hexdec($matches[3]); |
11492
|
|
|
} else { |
11493
|
|
|
// http://unicode.org/faq/utf_bom.html#utf16-4 |
11494
|
|
|
$cp = ((int) \hexdec($matches[1]) << 10) |
11495
|
|
|
+ (int) \hexdec($matches[2]) |
11496
|
|
|
+ 0x10000 |
11497
|
|
|
- (0xD800 << 10) |
11498
|
|
|
- 0xDC00; |
11499
|
|
|
} |
11500
|
|
|
|
11501
|
|
|
// https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471 |
11502
|
|
|
// |
11503
|
|
|
// php_utf32_utf8(unsigned char *buf, unsigned k) |
11504
|
|
|
|
11505
|
12 |
|
if ($cp < 0x80) { |
11506
|
8 |
|
return (string) self::chr($cp); |
11507
|
|
|
} |
11508
|
|
|
|
11509
|
9 |
|
if ($cp < 0xA0) { |
11510
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
11511
|
|
|
return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F); |
11512
|
|
|
} |
11513
|
|
|
|
11514
|
9 |
|
return self::decimal_to_chr($cp); |
11515
|
41 |
|
}, |
11516
|
41 |
|
$buf |
11517
|
|
|
); |
11518
|
|
|
|
11519
|
41 |
|
if ($buf === null) { |
11520
|
|
|
return ''; |
11521
|
|
|
} |
11522
|
|
|
|
11523
|
|
|
// decode UTF-8 codepoints |
11524
|
41 |
|
if ($decodeHtmlEntityToUtf8 === true) { |
11525
|
2 |
|
$buf = self::html_entity_decode($buf); |
11526
|
|
|
} |
11527
|
|
|
|
11528
|
41 |
|
return $buf; |
11529
|
|
|
} |
11530
|
|
|
|
11531
|
|
|
/** |
11532
|
|
|
* Strip whitespace or other characters from beginning or end of a UTF-8 string. |
11533
|
|
|
* |
11534
|
|
|
* INFO: This is slower then "trim()" |
11535
|
|
|
* |
11536
|
|
|
* We can only use the original-function, if we use <= 7-Bit in the string / chars |
11537
|
|
|
* but the check for ACSII (7-Bit) cost more time, then we can safe here. |
11538
|
|
|
* |
11539
|
|
|
* @param string $str <p>The string to be trimmed</p> |
11540
|
|
|
* @param string|null $chars [optional] <p>Optional characters to be stripped</p> |
11541
|
|
|
* |
11542
|
|
|
* @return string the trimmed string |
11543
|
|
|
*/ |
11544
|
55 |
|
public static function trim(string $str = '', string $chars = null): string |
11545
|
|
|
{ |
11546
|
55 |
|
if ($str === '') { |
11547
|
9 |
|
return ''; |
11548
|
|
|
} |
11549
|
|
|
|
11550
|
48 |
|
if ($chars) { |
11551
|
27 |
|
$chars = \preg_quote($chars, '/'); |
11552
|
27 |
|
$pattern = "^[${chars}]+|[${chars}]+\$"; |
11553
|
|
|
} else { |
11554
|
21 |
|
$pattern = '^[\\s]+|[\\s]+$'; |
11555
|
|
|
} |
11556
|
|
|
|
11557
|
48 |
|
if (self::$SUPPORT['mbstring'] === true) { |
11558
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
11559
|
48 |
|
return (string) \mb_ereg_replace($pattern, '', $str); |
11560
|
|
|
} |
11561
|
|
|
|
11562
|
8 |
|
return self::regex_replace($str, $pattern, '', '', '/'); |
11563
|
|
|
} |
11564
|
|
|
|
11565
|
|
|
/** |
11566
|
|
|
* Makes string's first char uppercase. |
11567
|
|
|
* |
11568
|
|
|
* @param string $str <p>The input string.</p> |
11569
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
11570
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
11571
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
11572
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
11573
|
|
|
* |
11574
|
|
|
* @return string the resulting string |
11575
|
|
|
*/ |
11576
|
69 |
|
public static function ucfirst( |
11577
|
|
|
string $str, |
11578
|
|
|
string $encoding = 'UTF-8', |
11579
|
|
|
bool $cleanUtf8 = false, |
11580
|
|
|
string $lang = null, |
11581
|
|
|
bool $tryToKeepStringLength = false |
11582
|
|
|
): string { |
11583
|
69 |
|
if ($str === '') { |
11584
|
3 |
|
return ''; |
11585
|
|
|
} |
11586
|
|
|
|
11587
|
68 |
|
if ($cleanUtf8 === true) { |
11588
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
11589
|
|
|
// if invalid characters are found in $haystack before $needle |
11590
|
1 |
|
$str = self::clean($str); |
11591
|
|
|
} |
11592
|
|
|
|
11593
|
68 |
|
$useMbFunction = $lang === null && $tryToKeepStringLength === false; |
11594
|
|
|
|
11595
|
68 |
|
if ($encoding === 'UTF-8') { |
11596
|
22 |
|
$strPartTwo = (string) \mb_substr($str, 1); |
11597
|
|
|
|
11598
|
22 |
|
if ($useMbFunction === true) { |
11599
|
22 |
|
$strPartOne = \mb_strtoupper( |
11600
|
22 |
|
(string) \mb_substr($str, 0, 1) |
11601
|
|
|
); |
11602
|
|
|
} else { |
11603
|
|
|
$strPartOne = self::strtoupper( |
11604
|
|
|
(string) \mb_substr($str, 0, 1), |
11605
|
|
|
$encoding, |
11606
|
|
|
false, |
11607
|
|
|
$lang, |
11608
|
22 |
|
$tryToKeepStringLength |
11609
|
|
|
); |
11610
|
|
|
} |
11611
|
|
|
} else { |
11612
|
47 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
11613
|
|
|
|
11614
|
47 |
|
$strPartTwo = (string) self::substr($str, 1, null, $encoding); |
11615
|
|
|
|
11616
|
47 |
|
if ($useMbFunction === true) { |
11617
|
47 |
|
$strPartOne = \mb_strtoupper( |
11618
|
47 |
|
(string) \mb_substr($str, 0, 1, $encoding), |
11619
|
47 |
|
$encoding |
11620
|
|
|
); |
11621
|
|
|
} else { |
11622
|
|
|
$strPartOne = self::strtoupper( |
11623
|
|
|
(string) self::substr($str, 0, 1, $encoding), |
11624
|
|
|
$encoding, |
11625
|
|
|
false, |
11626
|
|
|
$lang, |
11627
|
|
|
$tryToKeepStringLength |
11628
|
|
|
); |
11629
|
|
|
} |
11630
|
|
|
} |
11631
|
|
|
|
11632
|
68 |
|
return $strPartOne . $strPartTwo; |
11633
|
|
|
} |
11634
|
|
|
|
11635
|
|
|
/** |
11636
|
|
|
* alias for "UTF8::ucfirst()" |
11637
|
|
|
* |
11638
|
|
|
* @param string $str |
11639
|
|
|
* @param string $encoding |
11640
|
|
|
* @param bool $cleanUtf8 |
11641
|
|
|
* |
11642
|
|
|
* @return string |
11643
|
|
|
* |
11644
|
|
|
* @see UTF8::ucfirst() |
11645
|
|
|
*/ |
11646
|
1 |
|
public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string |
11647
|
|
|
{ |
11648
|
1 |
|
return self::ucfirst($str, $encoding, $cleanUtf8); |
11649
|
|
|
} |
11650
|
|
|
|
11651
|
|
|
/** |
11652
|
|
|
* Uppercase for all words in the string. |
11653
|
|
|
* |
11654
|
|
|
* @param string $str <p>The input string.</p> |
11655
|
|
|
* @param string[] $exceptions [optional] <p>Exclusion for some words.</p> |
11656
|
|
|
* @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new |
11657
|
|
|
* word.</p> |
11658
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
11659
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
11660
|
|
|
* |
11661
|
|
|
* @return string |
11662
|
|
|
*/ |
11663
|
8 |
|
public static function ucwords( |
11664
|
|
|
string $str, |
11665
|
|
|
array $exceptions = [], |
11666
|
|
|
string $charlist = '', |
11667
|
|
|
string $encoding = 'UTF-8', |
11668
|
|
|
bool $cleanUtf8 = false |
11669
|
|
|
): string { |
11670
|
8 |
|
if (!$str) { |
11671
|
2 |
|
return ''; |
11672
|
|
|
} |
11673
|
|
|
|
11674
|
|
|
// INFO: mb_convert_case($str, MB_CASE_TITLE); |
11675
|
|
|
// -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters |
11676
|
|
|
|
11677
|
7 |
|
if ($cleanUtf8 === true) { |
11678
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
11679
|
|
|
// if invalid characters are found in $haystack before $needle |
11680
|
1 |
|
$str = self::clean($str); |
11681
|
|
|
} |
11682
|
|
|
|
11683
|
7 |
|
$usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions)); |
11684
|
|
|
|
11685
|
|
|
if ( |
11686
|
7 |
|
$usePhpDefaultFunctions === true |
11687
|
|
|
&& |
11688
|
7 |
|
self::is_ascii($str) === true |
11689
|
|
|
) { |
11690
|
|
|
return \ucwords($str); |
11691
|
|
|
} |
11692
|
|
|
|
11693
|
7 |
|
$words = self::str_to_words($str, $charlist); |
11694
|
7 |
|
$useExceptions = \count($exceptions) > 0; |
11695
|
|
|
|
11696
|
7 |
|
foreach ($words as &$word) { |
11697
|
7 |
|
if (!$word) { |
11698
|
7 |
|
continue; |
11699
|
|
|
} |
11700
|
|
|
|
11701
|
|
|
if ( |
11702
|
7 |
|
$useExceptions === false |
11703
|
|
|
|| |
11704
|
7 |
|
!\in_array($word, $exceptions, true) |
11705
|
|
|
) { |
11706
|
7 |
|
$word = self::ucfirst($word, $encoding); |
11707
|
|
|
} |
11708
|
|
|
} |
11709
|
|
|
|
11710
|
7 |
|
return \implode('', $words); |
11711
|
|
|
} |
11712
|
|
|
|
11713
|
|
|
/** |
11714
|
|
|
* Multi decode html entity & fix urlencoded-win1252-chars. |
11715
|
|
|
* |
11716
|
|
|
* e.g: |
11717
|
|
|
* 'test+test' => 'test test' |
11718
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
11719
|
|
|
* 'D%FCsseldorf' => 'Düsseldorf' |
11720
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
11721
|
|
|
* 'D%26%23xFC%3Bsseldorf' => 'Düsseldorf' |
11722
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
11723
|
|
|
* 'D%C3%BCsseldorf' => 'Düsseldorf' |
11724
|
|
|
* 'D%C3%83%C2%BCsseldorf' => 'Düsseldorf' |
11725
|
|
|
* 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf' |
11726
|
|
|
* |
11727
|
|
|
* @param string $str <p>The input string.</p> |
11728
|
|
|
* @param bool $multi_decode <p>Decode as often as possible.</p> |
11729
|
|
|
* |
11730
|
|
|
* @return string |
11731
|
|
|
*/ |
11732
|
4 |
|
public static function urldecode(string $str, bool $multi_decode = true): string |
11733
|
|
|
{ |
11734
|
4 |
|
if ($str === '') { |
11735
|
3 |
|
return ''; |
11736
|
|
|
} |
11737
|
|
|
|
11738
|
|
|
if ( |
11739
|
4 |
|
\strpos($str, '&') === false |
11740
|
|
|
&& |
11741
|
4 |
|
\strpos($str, '%') === false |
11742
|
|
|
&& |
11743
|
4 |
|
\strpos($str, '+') === false |
11744
|
|
|
&& |
11745
|
4 |
|
\strpos($str, '\u') === false |
11746
|
|
|
) { |
11747
|
3 |
|
return self::fix_simple_utf8($str); |
11748
|
|
|
} |
11749
|
|
|
|
11750
|
4 |
|
$str = self::urldecode_unicode_helper($str); |
11751
|
|
|
|
11752
|
|
|
do { |
11753
|
4 |
|
$str_compare = $str; |
11754
|
|
|
|
11755
|
|
|
/** |
11756
|
|
|
* @psalm-suppress PossiblyInvalidArgument |
11757
|
|
|
*/ |
11758
|
4 |
|
$str = self::fix_simple_utf8( |
11759
|
4 |
|
\urldecode( |
11760
|
4 |
|
self::html_entity_decode( |
11761
|
4 |
|
self::to_utf8($str), |
11762
|
4 |
|
\ENT_QUOTES | \ENT_HTML5 |
11763
|
|
|
) |
11764
|
|
|
) |
11765
|
|
|
); |
11766
|
4 |
|
} while ($multi_decode === true && $str_compare !== $str); |
11767
|
|
|
|
11768
|
4 |
|
return $str; |
11769
|
|
|
} |
11770
|
|
|
|
11771
|
|
|
/** |
11772
|
|
|
* Return a array with "urlencoded"-win1252 -> UTF-8 |
11773
|
|
|
* |
11774
|
|
|
* @return string[] |
11775
|
|
|
* |
11776
|
|
|
* @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p> |
11777
|
|
|
*/ |
11778
|
2 |
|
public static function urldecode_fix_win1252_chars(): array |
11779
|
|
|
{ |
11780
|
|
|
return [ |
11781
|
2 |
|
'%20' => ' ', |
11782
|
|
|
'%21' => '!', |
11783
|
|
|
'%22' => '"', |
11784
|
|
|
'%23' => '#', |
11785
|
|
|
'%24' => '$', |
11786
|
|
|
'%25' => '%', |
11787
|
|
|
'%26' => '&', |
11788
|
|
|
'%27' => "'", |
11789
|
|
|
'%28' => '(', |
11790
|
|
|
'%29' => ')', |
11791
|
|
|
'%2A' => '*', |
11792
|
|
|
'%2B' => '+', |
11793
|
|
|
'%2C' => ',', |
11794
|
|
|
'%2D' => '-', |
11795
|
|
|
'%2E' => '.', |
11796
|
|
|
'%2F' => '/', |
11797
|
|
|
'%30' => '0', |
11798
|
|
|
'%31' => '1', |
11799
|
|
|
'%32' => '2', |
11800
|
|
|
'%33' => '3', |
11801
|
|
|
'%34' => '4', |
11802
|
|
|
'%35' => '5', |
11803
|
|
|
'%36' => '6', |
11804
|
|
|
'%37' => '7', |
11805
|
|
|
'%38' => '8', |
11806
|
|
|
'%39' => '9', |
11807
|
|
|
'%3A' => ':', |
11808
|
|
|
'%3B' => ';', |
11809
|
|
|
'%3C' => '<', |
11810
|
|
|
'%3D' => '=', |
11811
|
|
|
'%3E' => '>', |
11812
|
|
|
'%3F' => '?', |
11813
|
|
|
'%40' => '@', |
11814
|
|
|
'%41' => 'A', |
11815
|
|
|
'%42' => 'B', |
11816
|
|
|
'%43' => 'C', |
11817
|
|
|
'%44' => 'D', |
11818
|
|
|
'%45' => 'E', |
11819
|
|
|
'%46' => 'F', |
11820
|
|
|
'%47' => 'G', |
11821
|
|
|
'%48' => 'H', |
11822
|
|
|
'%49' => 'I', |
11823
|
|
|
'%4A' => 'J', |
11824
|
|
|
'%4B' => 'K', |
11825
|
|
|
'%4C' => 'L', |
11826
|
|
|
'%4D' => 'M', |
11827
|
|
|
'%4E' => 'N', |
11828
|
|
|
'%4F' => 'O', |
11829
|
|
|
'%50' => 'P', |
11830
|
|
|
'%51' => 'Q', |
11831
|
|
|
'%52' => 'R', |
11832
|
|
|
'%53' => 'S', |
11833
|
|
|
'%54' => 'T', |
11834
|
|
|
'%55' => 'U', |
11835
|
|
|
'%56' => 'V', |
11836
|
|
|
'%57' => 'W', |
11837
|
|
|
'%58' => 'X', |
11838
|
|
|
'%59' => 'Y', |
11839
|
|
|
'%5A' => 'Z', |
11840
|
|
|
'%5B' => '[', |
11841
|
|
|
'%5C' => '\\', |
11842
|
|
|
'%5D' => ']', |
11843
|
|
|
'%5E' => '^', |
11844
|
|
|
'%5F' => '_', |
11845
|
|
|
'%60' => '`', |
11846
|
|
|
'%61' => 'a', |
11847
|
|
|
'%62' => 'b', |
11848
|
|
|
'%63' => 'c', |
11849
|
|
|
'%64' => 'd', |
11850
|
|
|
'%65' => 'e', |
11851
|
|
|
'%66' => 'f', |
11852
|
|
|
'%67' => 'g', |
11853
|
|
|
'%68' => 'h', |
11854
|
|
|
'%69' => 'i', |
11855
|
|
|
'%6A' => 'j', |
11856
|
|
|
'%6B' => 'k', |
11857
|
|
|
'%6C' => 'l', |
11858
|
|
|
'%6D' => 'm', |
11859
|
|
|
'%6E' => 'n', |
11860
|
|
|
'%6F' => 'o', |
11861
|
|
|
'%70' => 'p', |
11862
|
|
|
'%71' => 'q', |
11863
|
|
|
'%72' => 'r', |
11864
|
|
|
'%73' => 's', |
11865
|
|
|
'%74' => 't', |
11866
|
|
|
'%75' => 'u', |
11867
|
|
|
'%76' => 'v', |
11868
|
|
|
'%77' => 'w', |
11869
|
|
|
'%78' => 'x', |
11870
|
|
|
'%79' => 'y', |
11871
|
|
|
'%7A' => 'z', |
11872
|
|
|
'%7B' => '{', |
11873
|
|
|
'%7C' => '|', |
11874
|
|
|
'%7D' => '}', |
11875
|
|
|
'%7E' => '~', |
11876
|
|
|
'%7F' => '', |
11877
|
|
|
'%80' => '`', |
11878
|
|
|
'%81' => '', |
11879
|
|
|
'%82' => '‚', |
11880
|
|
|
'%83' => 'ƒ', |
11881
|
|
|
'%84' => '„', |
11882
|
|
|
'%85' => '…', |
11883
|
|
|
'%86' => '†', |
11884
|
|
|
'%87' => '‡', |
11885
|
|
|
'%88' => 'ˆ', |
11886
|
|
|
'%89' => '‰', |
11887
|
|
|
'%8A' => 'Š', |
11888
|
|
|
'%8B' => '‹', |
11889
|
|
|
'%8C' => 'Œ', |
11890
|
|
|
'%8D' => '', |
11891
|
|
|
'%8E' => 'Ž', |
11892
|
|
|
'%8F' => '', |
11893
|
|
|
'%90' => '', |
11894
|
|
|
'%91' => '‘', |
11895
|
|
|
'%92' => '’', |
11896
|
|
|
'%93' => '“', |
11897
|
|
|
'%94' => '”', |
11898
|
|
|
'%95' => '•', |
11899
|
|
|
'%96' => '–', |
11900
|
|
|
'%97' => '—', |
11901
|
|
|
'%98' => '˜', |
11902
|
|
|
'%99' => '™', |
11903
|
|
|
'%9A' => 'š', |
11904
|
|
|
'%9B' => '›', |
11905
|
|
|
'%9C' => 'œ', |
11906
|
|
|
'%9D' => '', |
11907
|
|
|
'%9E' => 'ž', |
11908
|
|
|
'%9F' => 'Ÿ', |
11909
|
|
|
'%A0' => '', |
11910
|
|
|
'%A1' => '¡', |
11911
|
|
|
'%A2' => '¢', |
11912
|
|
|
'%A3' => '£', |
11913
|
|
|
'%A4' => '¤', |
11914
|
|
|
'%A5' => '¥', |
11915
|
|
|
'%A6' => '¦', |
11916
|
|
|
'%A7' => '§', |
11917
|
|
|
'%A8' => '¨', |
11918
|
|
|
'%A9' => '©', |
11919
|
|
|
'%AA' => 'ª', |
11920
|
|
|
'%AB' => '«', |
11921
|
|
|
'%AC' => '¬', |
11922
|
|
|
'%AD' => '', |
11923
|
|
|
'%AE' => '®', |
11924
|
|
|
'%AF' => '¯', |
11925
|
|
|
'%B0' => '°', |
11926
|
|
|
'%B1' => '±', |
11927
|
|
|
'%B2' => '²', |
11928
|
|
|
'%B3' => '³', |
11929
|
|
|
'%B4' => '´', |
11930
|
|
|
'%B5' => 'µ', |
11931
|
|
|
'%B6' => '¶', |
11932
|
|
|
'%B7' => '·', |
11933
|
|
|
'%B8' => '¸', |
11934
|
|
|
'%B9' => '¹', |
11935
|
|
|
'%BA' => 'º', |
11936
|
|
|
'%BB' => '»', |
11937
|
|
|
'%BC' => '¼', |
11938
|
|
|
'%BD' => '½', |
11939
|
|
|
'%BE' => '¾', |
11940
|
|
|
'%BF' => '¿', |
11941
|
|
|
'%C0' => 'À', |
11942
|
|
|
'%C1' => 'Á', |
11943
|
|
|
'%C2' => 'Â', |
11944
|
|
|
'%C3' => 'Ã', |
11945
|
|
|
'%C4' => 'Ä', |
11946
|
|
|
'%C5' => 'Å', |
11947
|
|
|
'%C6' => 'Æ', |
11948
|
|
|
'%C7' => 'Ç', |
11949
|
|
|
'%C8' => 'È', |
11950
|
|
|
'%C9' => 'É', |
11951
|
|
|
'%CA' => 'Ê', |
11952
|
|
|
'%CB' => 'Ë', |
11953
|
|
|
'%CC' => 'Ì', |
11954
|
|
|
'%CD' => 'Í', |
11955
|
|
|
'%CE' => 'Î', |
11956
|
|
|
'%CF' => 'Ï', |
11957
|
|
|
'%D0' => 'Ð', |
11958
|
|
|
'%D1' => 'Ñ', |
11959
|
|
|
'%D2' => 'Ò', |
11960
|
|
|
'%D3' => 'Ó', |
11961
|
|
|
'%D4' => 'Ô', |
11962
|
|
|
'%D5' => 'Õ', |
11963
|
|
|
'%D6' => 'Ö', |
11964
|
|
|
'%D7' => '×', |
11965
|
|
|
'%D8' => 'Ø', |
11966
|
|
|
'%D9' => 'Ù', |
11967
|
|
|
'%DA' => 'Ú', |
11968
|
|
|
'%DB' => 'Û', |
11969
|
|
|
'%DC' => 'Ü', |
11970
|
|
|
'%DD' => 'Ý', |
11971
|
|
|
'%DE' => 'Þ', |
11972
|
|
|
'%DF' => 'ß', |
11973
|
|
|
'%E0' => 'à', |
11974
|
|
|
'%E1' => 'á', |
11975
|
|
|
'%E2' => 'â', |
11976
|
|
|
'%E3' => 'ã', |
11977
|
|
|
'%E4' => 'ä', |
11978
|
|
|
'%E5' => 'å', |
11979
|
|
|
'%E6' => 'æ', |
11980
|
|
|
'%E7' => 'ç', |
11981
|
|
|
'%E8' => 'è', |
11982
|
|
|
'%E9' => 'é', |
11983
|
|
|
'%EA' => 'ê', |
11984
|
|
|
'%EB' => 'ë', |
11985
|
|
|
'%EC' => 'ì', |
11986
|
|
|
'%ED' => 'í', |
11987
|
|
|
'%EE' => 'î', |
11988
|
|
|
'%EF' => 'ï', |
11989
|
|
|
'%F0' => 'ð', |
11990
|
|
|
'%F1' => 'ñ', |
11991
|
|
|
'%F2' => 'ò', |
11992
|
|
|
'%F3' => 'ó', |
11993
|
|
|
'%F4' => 'ô', |
11994
|
|
|
'%F5' => 'õ', |
11995
|
|
|
'%F6' => 'ö', |
11996
|
|
|
'%F7' => '÷', |
11997
|
|
|
'%F8' => 'ø', |
11998
|
|
|
'%F9' => 'ù', |
11999
|
|
|
'%FA' => 'ú', |
12000
|
|
|
'%FB' => 'û', |
12001
|
|
|
'%FC' => 'ü', |
12002
|
|
|
'%FD' => 'ý', |
12003
|
|
|
'%FE' => 'þ', |
12004
|
|
|
'%FF' => 'ÿ', |
12005
|
|
|
]; |
12006
|
|
|
} |
12007
|
|
|
|
12008
|
|
|
/** |
12009
|
|
|
* Decodes an UTF-8 string to ISO-8859-1. |
12010
|
|
|
* |
12011
|
|
|
* @param string $str <p>The input string.</p> |
12012
|
|
|
* @param bool $keepUtf8Chars |
12013
|
|
|
* |
12014
|
|
|
* @return string |
12015
|
|
|
*/ |
12016
|
14 |
|
public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string |
12017
|
|
|
{ |
12018
|
14 |
|
if ($str === '') { |
12019
|
6 |
|
return ''; |
12020
|
|
|
} |
12021
|
|
|
|
12022
|
|
|
// save for later comparision |
12023
|
14 |
|
$str_backup = $str; |
12024
|
14 |
|
$len = \strlen($str); |
12025
|
|
|
|
12026
|
14 |
|
if (self::$ORD === null) { |
12027
|
|
|
self::$ORD = self::getData('ord'); |
12028
|
|
|
} |
12029
|
|
|
|
12030
|
14 |
|
if (self::$CHR === null) { |
12031
|
|
|
self::$CHR = self::getData('chr'); |
12032
|
|
|
} |
12033
|
|
|
|
12034
|
14 |
|
$noCharFound = '?'; |
12035
|
|
|
/** @noinspection ForeachInvariantsInspection */ |
12036
|
14 |
|
for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) { |
12037
|
14 |
|
switch ($str[$i] & "\xF0") { |
12038
|
14 |
|
case "\xC0": |
12039
|
13 |
|
case "\xD0": |
12040
|
13 |
|
$c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"]; |
12041
|
13 |
|
$str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound; |
12042
|
|
|
|
12043
|
13 |
|
break; |
12044
|
|
|
|
12045
|
|
|
/** @noinspection PhpMissingBreakStatementInspection */ |
12046
|
13 |
|
case "\xF0": |
12047
|
|
|
++$i; |
12048
|
|
|
|
12049
|
|
|
// no break |
12050
|
|
|
|
12051
|
13 |
|
case "\xE0": |
12052
|
11 |
|
$str[$j] = $noCharFound; |
12053
|
11 |
|
$i += 2; |
12054
|
|
|
|
12055
|
11 |
|
break; |
12056
|
|
|
|
12057
|
|
|
default: |
12058
|
12 |
|
$str[$j] = $str[$i]; |
12059
|
|
|
} |
12060
|
|
|
} |
12061
|
|
|
|
12062
|
14 |
|
$return = \substr($str, 0, $j); |
12063
|
14 |
|
if ($return === false) { |
12064
|
|
|
$return = ''; |
12065
|
|
|
} |
12066
|
|
|
|
12067
|
|
|
if ( |
12068
|
14 |
|
$keepUtf8Chars === true |
12069
|
|
|
&& |
12070
|
14 |
|
self::strlen($return) >= (int) self::strlen($str_backup) |
12071
|
|
|
) { |
12072
|
2 |
|
return $str_backup; |
12073
|
|
|
} |
12074
|
|
|
|
12075
|
14 |
|
return $return; |
12076
|
|
|
} |
12077
|
|
|
|
12078
|
|
|
/** |
12079
|
|
|
* Encodes an ISO-8859-1 string to UTF-8. |
12080
|
|
|
* |
12081
|
|
|
* @param string $str <p>The input string.</p> |
12082
|
|
|
* |
12083
|
|
|
* @return string |
12084
|
|
|
*/ |
12085
|
14 |
|
public static function utf8_encode(string $str): string |
12086
|
|
|
{ |
12087
|
14 |
|
if ($str === '') { |
12088
|
14 |
|
return ''; |
12089
|
|
|
} |
12090
|
|
|
|
12091
|
14 |
|
$str = \utf8_encode($str); |
12092
|
|
|
|
12093
|
|
|
// the polyfill maybe return false |
12094
|
|
|
/** @noinspection CallableParameterUseCaseInTypeContextInspection */ |
12095
|
|
|
/** @psalm-suppress TypeDoesNotContainType */ |
12096
|
14 |
|
if ($str === false) { |
12097
|
|
|
return ''; |
12098
|
|
|
} |
12099
|
|
|
|
12100
|
14 |
|
return $str; |
12101
|
|
|
} |
12102
|
|
|
|
12103
|
|
|
/** |
12104
|
|
|
* fix -> utf8-win1252 chars |
12105
|
|
|
* |
12106
|
|
|
* @param string $str <p>The input string.</p> |
12107
|
|
|
* |
12108
|
|
|
* @return string |
12109
|
|
|
* |
12110
|
|
|
* @deprecated <p>use "UTF8::fix_simple_utf8()"</p> |
12111
|
|
|
*/ |
12112
|
2 |
|
public static function utf8_fix_win1252_chars(string $str): string |
12113
|
|
|
{ |
12114
|
2 |
|
return self::fix_simple_utf8($str); |
12115
|
|
|
} |
12116
|
|
|
|
12117
|
|
|
/** |
12118
|
|
|
* Returns an array with all utf8 whitespace characters. |
12119
|
|
|
* |
12120
|
|
|
* @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html |
12121
|
|
|
* |
12122
|
|
|
* @return string[] |
12123
|
|
|
* An array with all known whitespace characters as values and the type of whitespace as keys |
12124
|
|
|
* as defined in above URL |
12125
|
|
|
*/ |
12126
|
2 |
|
public static function whitespace_table(): array |
12127
|
|
|
{ |
12128
|
2 |
|
return self::$WHITESPACE_TABLE; |
12129
|
|
|
} |
12130
|
|
|
|
12131
|
|
|
/** |
12132
|
|
|
* Limit the number of words in a string. |
12133
|
|
|
* |
12134
|
|
|
* @param string $str <p>The input string.</p> |
12135
|
|
|
* @param int $limit <p>The limit of words as integer.</p> |
12136
|
|
|
* @param string $strAddOn <p>Replacement for the striped string.</p> |
12137
|
|
|
* |
12138
|
|
|
* @return string |
12139
|
|
|
*/ |
12140
|
2 |
|
public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string |
12141
|
|
|
{ |
12142
|
2 |
|
if ($str === '' || $limit < 1) { |
12143
|
2 |
|
return ''; |
12144
|
|
|
} |
12145
|
|
|
|
12146
|
2 |
|
\preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches); |
12147
|
|
|
|
12148
|
|
|
if ( |
12149
|
2 |
|
!isset($matches[0]) |
12150
|
|
|
|| |
12151
|
2 |
|
\mb_strlen($str) === (int) \mb_strlen($matches[0]) |
12152
|
|
|
) { |
12153
|
2 |
|
return $str; |
12154
|
|
|
} |
12155
|
|
|
|
12156
|
2 |
|
return \rtrim($matches[0]) . $strAddOn; |
12157
|
|
|
} |
12158
|
|
|
|
12159
|
|
|
/** |
12160
|
|
|
* Wraps a string to a given number of characters |
12161
|
|
|
* |
12162
|
|
|
* @see http://php.net/manual/en/function.wordwrap.php |
12163
|
|
|
* |
12164
|
|
|
* @param string $str <p>The input string.</p> |
12165
|
|
|
* @param int $width [optional] <p>The column width.</p> |
12166
|
|
|
* @param string $break [optional] <p>The line is broken using the optional break parameter.</p> |
12167
|
|
|
* @param bool $cut [optional] <p> |
12168
|
|
|
* If the cut is set to true, the string is |
12169
|
|
|
* always wrapped at or before the specified width. So if you have |
12170
|
|
|
* a word that is larger than the given width, it is broken apart. |
12171
|
|
|
* </p> |
12172
|
|
|
* |
12173
|
|
|
* @return string |
12174
|
|
|
* <p>The given string wrapped at the specified column.</p> |
12175
|
|
|
*/ |
12176
|
12 |
|
public static function wordwrap( |
12177
|
|
|
string $str, |
12178
|
|
|
int $width = 75, |
12179
|
|
|
string $break = "\n", |
12180
|
|
|
bool $cut = false |
12181
|
|
|
): string { |
12182
|
12 |
|
if ($str === '' || $break === '') { |
12183
|
4 |
|
return ''; |
12184
|
|
|
} |
12185
|
|
|
|
12186
|
10 |
|
$strSplit = \explode($break, $str); |
12187
|
10 |
|
if ($strSplit === false) { |
12188
|
|
|
return ''; |
12189
|
|
|
} |
12190
|
|
|
|
12191
|
10 |
|
$chars = []; |
12192
|
10 |
|
$wordSplit = ''; |
12193
|
10 |
|
foreach ($strSplit as $i => $iValue) { |
12194
|
10 |
|
if ($i) { |
12195
|
3 |
|
$chars[] = $break; |
12196
|
3 |
|
$wordSplit .= '#'; |
12197
|
|
|
} |
12198
|
|
|
|
12199
|
10 |
|
foreach (self::str_split($iValue) as $c) { |
12200
|
10 |
|
$chars[] = $c; |
12201
|
10 |
|
if ($c === ' ') { |
12202
|
3 |
|
$wordSplit .= ' '; |
12203
|
|
|
} else { |
12204
|
10 |
|
$wordSplit .= '?'; |
12205
|
|
|
} |
12206
|
|
|
} |
12207
|
|
|
} |
12208
|
|
|
|
12209
|
10 |
|
$strReturn = ''; |
12210
|
10 |
|
$j = 0; |
12211
|
10 |
|
$b = -1; |
12212
|
10 |
|
$i = -1; |
12213
|
10 |
|
$wordSplit = \wordwrap($wordSplit, $width, '#', $cut); |
12214
|
|
|
|
12215
|
10 |
|
$max = \mb_strlen($wordSplit); |
12216
|
10 |
|
while (($b = \mb_strpos($wordSplit, '#', $b + 1)) !== false) { |
12217
|
8 |
|
for (++$i; $i < $b; ++$i) { |
12218
|
8 |
|
$strReturn .= $chars[$j]; |
12219
|
8 |
|
unset($chars[$j++]); |
12220
|
|
|
|
12221
|
|
|
// prevent endless loop, e.g. if there is a error in the "mb_*" polyfill |
12222
|
8 |
|
if ($i > $max) { |
12223
|
|
|
break 2; |
12224
|
|
|
} |
12225
|
|
|
} |
12226
|
|
|
|
12227
|
|
|
if ( |
12228
|
8 |
|
$break === $chars[$j] |
12229
|
|
|
|| |
12230
|
8 |
|
$chars[$j] === ' ' |
12231
|
|
|
) { |
12232
|
5 |
|
unset($chars[$j++]); |
12233
|
|
|
} |
12234
|
|
|
|
12235
|
8 |
|
$strReturn .= $break; |
12236
|
|
|
|
12237
|
|
|
// prevent endless loop, e.g. if there is a error in the "mb_*" polyfill |
12238
|
8 |
|
if ($b > $max) { |
12239
|
|
|
break; |
12240
|
|
|
} |
12241
|
|
|
} |
12242
|
|
|
|
12243
|
10 |
|
return $strReturn . \implode('', $chars); |
12244
|
|
|
} |
12245
|
|
|
|
12246
|
|
|
/** |
12247
|
|
|
* Line-Wrap the string after $limit, but split the string by "$delimiter" before ... |
12248
|
|
|
* ... so that we wrap the per line. |
12249
|
|
|
* |
12250
|
|
|
* @param string $str <p>The input string.</p> |
12251
|
|
|
* @param int $width [optional] <p>The column width.</p> |
12252
|
|
|
* @param string $break [optional] <p>The line is broken using the optional break parameter.</p> |
12253
|
|
|
* @param bool $cut [optional] <p> |
12254
|
|
|
* If the cut is set to true, the string is |
12255
|
|
|
* always wrapped at or before the specified width. So if you have |
12256
|
|
|
* a word that is larger than the given width, it is broken apart. |
12257
|
|
|
* </p> |
12258
|
|
|
* @param bool $addFinalBreak [optional] <p> |
12259
|
|
|
* If this flag is true, then the method will add a $break at the end |
12260
|
|
|
* of the result string. |
12261
|
|
|
* </p> |
12262
|
|
|
* @param string|null $delimiter [optional] <p> |
12263
|
|
|
* You can change the default behavior, where we split the string by newline. |
12264
|
|
|
* </p> |
12265
|
|
|
* |
12266
|
|
|
* @return string |
12267
|
|
|
*/ |
12268
|
1 |
|
public static function wordwrap_per_line( |
12269
|
|
|
string $str, |
12270
|
|
|
int $width = 75, |
12271
|
|
|
string $break = "\n", |
12272
|
|
|
bool $cut = false, |
12273
|
|
|
bool $addFinalBreak = true, |
12274
|
|
|
string $delimiter = null |
12275
|
|
|
): string { |
12276
|
1 |
|
if ($delimiter === null) { |
12277
|
1 |
|
$strings = \preg_split('/\\r\\n|\\r|\\n/', $str); |
12278
|
|
|
} else { |
12279
|
1 |
|
$strings = \explode($delimiter, $str); |
12280
|
|
|
} |
12281
|
|
|
|
12282
|
1 |
|
$stringArray = []; |
12283
|
1 |
|
if ($strings !== false) { |
12284
|
1 |
|
foreach ($strings as $value) { |
12285
|
1 |
|
$stringArray[] = self::wordwrap($value, $width, $break, $cut); |
12286
|
|
|
} |
12287
|
|
|
} |
12288
|
|
|
|
12289
|
1 |
|
if ($addFinalBreak) { |
12290
|
1 |
|
$finalBreak = $break; |
12291
|
|
|
} else { |
12292
|
1 |
|
$finalBreak = ''; |
12293
|
|
|
} |
12294
|
|
|
|
12295
|
1 |
|
return \implode($delimiter ?? "\n", $stringArray) . $finalBreak; |
12296
|
|
|
} |
12297
|
|
|
|
12298
|
|
|
/** |
12299
|
|
|
* Returns an array of Unicode White Space characters. |
12300
|
|
|
* |
12301
|
|
|
* @return string[] an array with numeric code point as key and White Space Character as value |
12302
|
|
|
*/ |
12303
|
2 |
|
public static function ws(): array |
12304
|
|
|
{ |
12305
|
2 |
|
return self::$WHITESPACE; |
12306
|
|
|
} |
12307
|
|
|
|
12308
|
|
|
/** |
12309
|
|
|
* @param string $str |
12310
|
|
|
* @param bool $useLower <p>Use uppercase by default, otherwise use lowecase.</p> |
12311
|
|
|
* @param bool $fullCaseFold <p>Convert not only common cases.</p> |
12312
|
|
|
* |
12313
|
|
|
* @return string |
12314
|
|
|
*/ |
12315
|
33 |
|
private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string |
12316
|
|
|
{ |
12317
|
33 |
|
$upper = self::$COMMON_CASE_FOLD['upper']; |
12318
|
33 |
|
$lower = self::$COMMON_CASE_FOLD['lower']; |
12319
|
|
|
|
12320
|
33 |
|
if ($useLower === true) { |
12321
|
2 |
|
$str = \str_replace( |
12322
|
2 |
|
$upper, |
12323
|
2 |
|
$lower, |
12324
|
2 |
|
$str |
12325
|
|
|
); |
12326
|
|
|
} else { |
12327
|
31 |
|
$str = \str_replace( |
12328
|
31 |
|
$lower, |
12329
|
31 |
|
$upper, |
12330
|
31 |
|
$str |
12331
|
|
|
); |
12332
|
|
|
} |
12333
|
|
|
|
12334
|
33 |
|
if ($fullCaseFold) { |
12335
|
31 |
|
static $FULL_CASE_FOLD = null; |
12336
|
31 |
|
if ($FULL_CASE_FOLD === null) { |
12337
|
1 |
|
$FULL_CASE_FOLD = self::getData('caseFolding_full'); |
12338
|
|
|
} |
12339
|
|
|
|
12340
|
31 |
|
if ($useLower === true) { |
12341
|
2 |
|
$str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str); |
12342
|
|
|
} else { |
12343
|
29 |
|
$str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str); |
12344
|
|
|
} |
12345
|
|
|
} |
12346
|
|
|
|
12347
|
33 |
|
return $str; |
12348
|
|
|
} |
12349
|
|
|
|
12350
|
|
|
/** |
12351
|
|
|
* get data from "/data/*.php" |
12352
|
|
|
* |
12353
|
|
|
* @param string $file |
12354
|
|
|
* |
12355
|
|
|
* @return array |
12356
|
|
|
*/ |
12357
|
6 |
|
private static function getData(string $file): array |
12358
|
|
|
{ |
12359
|
|
|
/** @noinspection PhpIncludeInspection */ |
12360
|
|
|
/** @noinspection UsingInclusionReturnValueInspection */ |
12361
|
|
|
/** @psalm-suppress UnresolvableInclude */ |
12362
|
6 |
|
return include __DIR__ . '/data/' . $file . '.php'; |
12363
|
|
|
} |
12364
|
|
|
|
12365
|
|
|
/** |
12366
|
|
|
* get data from "/data/*.php" |
12367
|
|
|
* |
12368
|
|
|
* @param string $file |
12369
|
|
|
* |
12370
|
|
|
* @return false|mixed will return false on error |
12371
|
|
|
*/ |
12372
|
9 |
|
private static function getDataIfExists(string $file) |
12373
|
|
|
{ |
12374
|
9 |
|
$file = __DIR__ . '/data/' . $file . '.php'; |
12375
|
9 |
|
if (\file_exists($file)) { |
12376
|
|
|
/** @noinspection PhpIncludeInspection */ |
12377
|
|
|
/** @noinspection UsingInclusionReturnValueInspection */ |
12378
|
8 |
|
return include $file; |
12379
|
|
|
} |
12380
|
|
|
|
12381
|
2 |
|
return false; |
12382
|
|
|
} |
12383
|
|
|
|
12384
|
|
|
/** |
12385
|
|
|
* @return true|null |
12386
|
|
|
*/ |
12387
|
12 |
|
private static function initEmojiData() |
12388
|
|
|
{ |
12389
|
12 |
|
if (self::$EMOJI_KEYS_CACHE === null) { |
12390
|
1 |
|
if (self::$EMOJI === null) { |
12391
|
1 |
|
self::$EMOJI = self::getData('emoji'); |
12392
|
|
|
} |
12393
|
|
|
|
12394
|
1 |
|
\uksort( |
12395
|
1 |
|
self::$EMOJI, |
12396
|
|
|
static function (string $a, string $b): int { |
12397
|
1 |
|
return \strlen($b) <=> \strlen($a); |
12398
|
1 |
|
} |
12399
|
|
|
); |
12400
|
|
|
|
12401
|
1 |
|
self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI); |
12402
|
1 |
|
self::$EMOJI_VALUES_CACHE = \array_values(self::$EMOJI); |
12403
|
|
|
|
12404
|
1 |
|
foreach (self::$EMOJI_KEYS_CACHE as $key) { |
12405
|
1 |
|
$tmpKey = \crc32($key); |
12406
|
1 |
|
self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmpKey . '_-_' . \strrev((string) $tmpKey) . '_-_8FTU_ELBATROP_-_'; |
12407
|
|
|
} |
12408
|
|
|
|
12409
|
1 |
|
return true; |
12410
|
|
|
} |
12411
|
|
|
|
12412
|
12 |
|
return null; |
12413
|
|
|
} |
12414
|
|
|
|
12415
|
|
|
/** |
12416
|
|
|
* Checks whether mbstring "overloaded" is active on the server. |
12417
|
|
|
* |
12418
|
|
|
* @return bool |
12419
|
|
|
*/ |
12420
|
|
|
private static function mbstring_overloaded(): bool |
12421
|
|
|
{ |
12422
|
|
|
/** |
12423
|
|
|
* INI directive 'mbstring.func_overload' is deprecated since PHP 7.2 |
12424
|
|
|
*/ |
12425
|
|
|
|
12426
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
12427
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
12428
|
|
|
return \defined('MB_OVERLOAD_STRING') |
12429
|
|
|
&& |
12430
|
|
|
((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING); |
12431
|
|
|
} |
12432
|
|
|
|
12433
|
|
|
/** |
12434
|
|
|
* @param array $strings |
12435
|
|
|
* @param bool $removeEmptyValues |
12436
|
|
|
* @param int $removeShortValues |
12437
|
|
|
* |
12438
|
|
|
* @return array |
12439
|
|
|
*/ |
12440
|
2 |
|
private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array |
12441
|
|
|
{ |
12442
|
|
|
// init |
12443
|
2 |
|
$return = []; |
12444
|
|
|
|
12445
|
2 |
|
foreach ($strings as &$str) { |
12446
|
|
|
if ( |
12447
|
2 |
|
$removeShortValues !== null |
12448
|
|
|
&& |
12449
|
2 |
|
\mb_strlen($str) <= $removeShortValues |
12450
|
|
|
) { |
12451
|
2 |
|
continue; |
12452
|
|
|
} |
12453
|
|
|
|
12454
|
|
|
if ( |
12455
|
2 |
|
$removeEmptyValues === true |
12456
|
|
|
&& |
12457
|
2 |
|
\trim($str) === '' |
12458
|
|
|
) { |
12459
|
2 |
|
continue; |
12460
|
|
|
} |
12461
|
|
|
|
12462
|
2 |
|
$return[] = $str; |
12463
|
|
|
} |
12464
|
|
|
|
12465
|
2 |
|
return $return; |
12466
|
|
|
} |
12467
|
|
|
|
12468
|
|
|
/** |
12469
|
|
|
* rxClass |
12470
|
|
|
* |
12471
|
|
|
* @param string $s |
12472
|
|
|
* @param string $class |
12473
|
|
|
* |
12474
|
|
|
* @return string |
12475
|
|
|
*/ |
12476
|
33 |
|
private static function rxClass(string $s, string $class = ''): string |
12477
|
|
|
{ |
12478
|
33 |
|
static $RX_CLASS_CACHE = []; |
12479
|
|
|
|
12480
|
33 |
|
$cacheKey = $s . $class; |
12481
|
|
|
|
12482
|
33 |
|
if (isset($RX_CLASS_CACHE[$cacheKey])) { |
12483
|
21 |
|
return $RX_CLASS_CACHE[$cacheKey]; |
12484
|
|
|
} |
12485
|
|
|
|
12486
|
16 |
|
$classArray = [$class]; |
12487
|
|
|
|
12488
|
|
|
/** @noinspection SuspiciousLoopInspection */ |
12489
|
|
|
/** @noinspection AlterInForeachInspection */ |
12490
|
16 |
|
foreach (self::str_split($s) as &$s) { |
|
|
|
|
12491
|
15 |
|
if ($s === '-') { |
12492
|
|
|
$classArray[0] = '-' . $classArray[0]; |
12493
|
15 |
|
} elseif (!isset($s[2])) { |
12494
|
15 |
|
$classArray[0] .= \preg_quote($s, '/'); |
12495
|
1 |
|
} elseif (self::strlen($s) === 1) { |
12496
|
1 |
|
$classArray[0] .= $s; |
12497
|
|
|
} else { |
12498
|
15 |
|
$classArray[] = $s; |
12499
|
|
|
} |
12500
|
|
|
} |
12501
|
|
|
|
12502
|
16 |
|
if ($classArray[0]) { |
12503
|
16 |
|
$classArray[0] = '[' . $classArray[0] . ']'; |
12504
|
|
|
} |
12505
|
|
|
|
12506
|
16 |
|
if (\count($classArray) === 1) { |
12507
|
16 |
|
$return = $classArray[0]; |
12508
|
|
|
} else { |
12509
|
|
|
$return = '(?:' . \implode('|', $classArray) . ')'; |
12510
|
|
|
} |
12511
|
|
|
|
12512
|
16 |
|
$RX_CLASS_CACHE[$cacheKey] = $return; |
12513
|
|
|
|
12514
|
16 |
|
return $return; |
12515
|
|
|
} |
12516
|
|
|
|
12517
|
|
|
/** |
12518
|
|
|
* Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius"). |
12519
|
|
|
* |
12520
|
|
|
* @param string $names |
12521
|
|
|
* @param string $delimiter |
12522
|
|
|
* @param string $encoding |
12523
|
|
|
* |
12524
|
|
|
* @return string |
12525
|
|
|
*/ |
12526
|
1 |
|
private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string |
12527
|
|
|
{ |
12528
|
|
|
// init |
12529
|
1 |
|
$namesArray = \explode($delimiter, $names); |
12530
|
|
|
|
12531
|
1 |
|
if ($namesArray === false) { |
12532
|
|
|
return ''; |
12533
|
|
|
} |
12534
|
|
|
|
12535
|
|
|
$specialCases = [ |
12536
|
1 |
|
'names' => [ |
12537
|
|
|
'ab', |
12538
|
|
|
'af', |
12539
|
|
|
'al', |
12540
|
|
|
'and', |
12541
|
|
|
'ap', |
12542
|
|
|
'bint', |
12543
|
|
|
'binte', |
12544
|
|
|
'da', |
12545
|
|
|
'de', |
12546
|
|
|
'del', |
12547
|
|
|
'den', |
12548
|
|
|
'der', |
12549
|
|
|
'di', |
12550
|
|
|
'dit', |
12551
|
|
|
'ibn', |
12552
|
|
|
'la', |
12553
|
|
|
'mac', |
12554
|
|
|
'nic', |
12555
|
|
|
'of', |
12556
|
|
|
'ter', |
12557
|
|
|
'the', |
12558
|
|
|
'und', |
12559
|
|
|
'van', |
12560
|
|
|
'von', |
12561
|
|
|
'y', |
12562
|
|
|
'zu', |
12563
|
|
|
], |
12564
|
|
|
'prefixes' => [ |
12565
|
|
|
'al-', |
12566
|
|
|
"d'", |
12567
|
|
|
'ff', |
12568
|
|
|
"l'", |
12569
|
|
|
'mac', |
12570
|
|
|
'mc', |
12571
|
|
|
'nic', |
12572
|
|
|
], |
12573
|
|
|
]; |
12574
|
|
|
|
12575
|
1 |
|
foreach ($namesArray as &$name) { |
12576
|
1 |
|
if (\in_array($name, $specialCases['names'], true)) { |
12577
|
1 |
|
continue; |
12578
|
|
|
} |
12579
|
|
|
|
12580
|
1 |
|
$continue = false; |
12581
|
|
|
|
12582
|
1 |
|
if ($delimiter === '-') { |
12583
|
|
|
/** @noinspection AlterInForeachInspection */ |
12584
|
1 |
|
foreach ((array) $specialCases['names'] as &$beginning) { |
12585
|
1 |
|
if (self::strpos($name, $beginning, 0, $encoding) === 0) { |
12586
|
1 |
|
$continue = true; |
12587
|
|
|
} |
12588
|
|
|
} |
12589
|
|
|
} |
12590
|
|
|
|
12591
|
|
|
/** @noinspection AlterInForeachInspection */ |
12592
|
1 |
|
foreach ((array) $specialCases['prefixes'] as &$beginning) { |
12593
|
1 |
|
if (self::strpos($name, $beginning, 0, $encoding) === 0) { |
12594
|
1 |
|
$continue = true; |
12595
|
|
|
} |
12596
|
|
|
} |
12597
|
|
|
|
12598
|
1 |
|
if ($continue === true) { |
12599
|
1 |
|
continue; |
12600
|
|
|
} |
12601
|
|
|
|
12602
|
1 |
|
$name = self::ucfirst($name); |
12603
|
|
|
} |
12604
|
|
|
|
12605
|
1 |
|
return \implode($delimiter, $namesArray); |
12606
|
|
|
} |
12607
|
|
|
|
12608
|
|
|
/** |
12609
|
|
|
* Generic case sensitive transformation for collation matching. |
12610
|
|
|
* |
12611
|
|
|
* @param string $str <p>The input string</p> |
12612
|
|
|
* |
12613
|
|
|
* @return string|null |
12614
|
|
|
*/ |
12615
|
6 |
|
private static function strtonatfold(string $str) |
12616
|
|
|
{ |
12617
|
6 |
|
return \preg_replace( |
12618
|
6 |
|
'/\p{Mn}+/u', |
12619
|
6 |
|
'', |
12620
|
6 |
|
\Normalizer::normalize($str, \Normalizer::NFD) |
12621
|
|
|
); |
12622
|
|
|
} |
12623
|
|
|
|
12624
|
|
|
/** |
12625
|
|
|
* @param int|string $input |
12626
|
|
|
* |
12627
|
|
|
* @return string |
12628
|
|
|
*/ |
12629
|
31 |
|
private static function to_utf8_convert_helper($input): string |
12630
|
|
|
{ |
12631
|
|
|
// init |
12632
|
31 |
|
$buf = ''; |
12633
|
|
|
|
12634
|
31 |
|
if (self::$ORD === null) { |
12635
|
1 |
|
self::$ORD = self::getData('ord'); |
12636
|
|
|
} |
12637
|
|
|
|
12638
|
31 |
|
if (self::$CHR === null) { |
12639
|
1 |
|
self::$CHR = self::getData('chr'); |
12640
|
|
|
} |
12641
|
|
|
|
12642
|
31 |
|
if (self::$WIN1252_TO_UTF8 === null) { |
12643
|
1 |
|
self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8'); |
12644
|
|
|
} |
12645
|
|
|
|
12646
|
31 |
|
$ordC1 = self::$ORD[$input]; |
12647
|
31 |
|
if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases |
12648
|
31 |
|
$buf .= self::$WIN1252_TO_UTF8[$ordC1]; |
12649
|
|
|
} else { |
12650
|
|
|
/** @noinspection OffsetOperationsInspection */ |
12651
|
1 |
|
$cc1 = self::$CHR[$ordC1 / 64] | "\xC0"; |
12652
|
1 |
|
$cc2 = ((string) $input & "\x3F") | "\x80"; |
|
|
|
|
12653
|
1 |
|
$buf .= $cc1 . $cc2; |
12654
|
|
|
} |
12655
|
|
|
|
12656
|
31 |
|
return $buf; |
12657
|
|
|
} |
12658
|
|
|
|
12659
|
|
|
/** |
12660
|
|
|
* @param string $str |
12661
|
|
|
* |
12662
|
|
|
* @return string |
12663
|
|
|
*/ |
12664
|
9 |
|
private static function urldecode_unicode_helper(string $str): string |
12665
|
|
|
{ |
12666
|
9 |
|
$pattern = '/%u([0-9a-fA-F]{3,4})/'; |
12667
|
9 |
|
if (\preg_match($pattern, $str)) { |
12668
|
7 |
|
$str = (string) \preg_replace($pattern, '&#x\\1;', $str); |
12669
|
|
|
} |
12670
|
|
|
|
12671
|
9 |
|
return $str; |
12672
|
|
|
} |
12673
|
|
|
} |
12674
|
|
|
|
Let?s assume that you have the following
foreach
statement:$itemValue
is assigned by reference. This is possible because the expression (in the example$array
) can be used as a reference target.However, if we were to replace
$array
with something different like the result of a function call as inthen assigning by reference is not possible anymore as there is no target that could be modified.
Available Fixes
1. Do not assign by reference
2. Assign to a local variable first
3. Return a reference