1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace voku\helper; |
6
|
|
|
|
7
|
|
|
final class UTF8 |
8
|
|
|
{ |
9
|
|
|
// (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control]) |
10
|
|
|
// This regular expression is a work around for http://bugs.exim.org/1279 |
11
|
|
|
const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])"; |
12
|
|
|
|
13
|
|
|
/** |
14
|
|
|
* Bom => Byte-Length |
15
|
|
|
* |
16
|
|
|
* INFO: https://en.wikipedia.org/wiki/Byte_order_mark |
17
|
|
|
* |
18
|
|
|
* @var array |
19
|
|
|
*/ |
20
|
|
|
private static $BOM = [ |
21
|
|
|
"\xef\xbb\xbf" => 3, // UTF-8 BOM |
22
|
|
|
'' => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...) |
23
|
|
|
"\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM |
24
|
|
|
' þÿ' => 6, // UTF-32 (BE) BOM as "WINDOWS-1252" |
25
|
|
|
"\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM |
26
|
|
|
'ÿþ ' => 6, // UTF-32 (LE) BOM as "WINDOWS-1252" |
27
|
|
|
"\xfe\xff" => 2, // UTF-16 (BE) BOM |
28
|
|
|
'þÿ' => 4, // UTF-16 (BE) BOM as "WINDOWS-1252" |
29
|
|
|
"\xff\xfe" => 2, // UTF-16 (LE) BOM |
30
|
|
|
'ÿþ' => 4, // UTF-16 (LE) BOM as "WINDOWS-1252" |
31
|
|
|
]; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* Numeric code point => UTF-8 Character |
35
|
|
|
* |
36
|
|
|
* url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp |
37
|
|
|
* |
38
|
|
|
* @var array |
39
|
|
|
*/ |
40
|
|
|
private static $WHITESPACE = [ |
41
|
|
|
// NUL Byte |
42
|
|
|
0 => "\x0", |
43
|
|
|
// Tab |
44
|
|
|
9 => "\x9", |
45
|
|
|
// New Line |
46
|
|
|
10 => "\xa", |
47
|
|
|
// Vertical Tab |
48
|
|
|
11 => "\xb", |
49
|
|
|
// Carriage Return |
50
|
|
|
13 => "\xd", |
51
|
|
|
// Ordinary Space |
52
|
|
|
32 => "\x20", |
53
|
|
|
// NO-BREAK SPACE |
54
|
|
|
160 => "\xc2\xa0", |
55
|
|
|
// OGHAM SPACE MARK |
56
|
|
|
5760 => "\xe1\x9a\x80", |
57
|
|
|
// MONGOLIAN VOWEL SEPARATOR |
58
|
|
|
6158 => "\xe1\xa0\x8e", |
59
|
|
|
// EN QUAD |
60
|
|
|
8192 => "\xe2\x80\x80", |
61
|
|
|
// EM QUAD |
62
|
|
|
8193 => "\xe2\x80\x81", |
63
|
|
|
// EN SPACE |
64
|
|
|
8194 => "\xe2\x80\x82", |
65
|
|
|
// EM SPACE |
66
|
|
|
8195 => "\xe2\x80\x83", |
67
|
|
|
// THREE-PER-EM SPACE |
68
|
|
|
8196 => "\xe2\x80\x84", |
69
|
|
|
// FOUR-PER-EM SPACE |
70
|
|
|
8197 => "\xe2\x80\x85", |
71
|
|
|
// SIX-PER-EM SPACE |
72
|
|
|
8198 => "\xe2\x80\x86", |
73
|
|
|
// FIGURE SPACE |
74
|
|
|
8199 => "\xe2\x80\x87", |
75
|
|
|
// PUNCTUATION SPACE |
76
|
|
|
8200 => "\xe2\x80\x88", |
77
|
|
|
// THIN SPACE |
78
|
|
|
8201 => "\xe2\x80\x89", |
79
|
|
|
//HAIR SPACE |
80
|
|
|
8202 => "\xe2\x80\x8a", |
81
|
|
|
// LINE SEPARATOR |
82
|
|
|
8232 => "\xe2\x80\xa8", |
83
|
|
|
// PARAGRAPH SEPARATOR |
84
|
|
|
8233 => "\xe2\x80\xa9", |
85
|
|
|
// NARROW NO-BREAK SPACE |
86
|
|
|
8239 => "\xe2\x80\xaf", |
87
|
|
|
// MEDIUM MATHEMATICAL SPACE |
88
|
|
|
8287 => "\xe2\x81\x9f", |
89
|
|
|
// IDEOGRAPHIC SPACE |
90
|
|
|
12288 => "\xe3\x80\x80", |
91
|
|
|
]; |
92
|
|
|
|
93
|
|
|
/** |
94
|
|
|
* @var array |
95
|
|
|
*/ |
96
|
|
|
private static $WHITESPACE_TABLE = [ |
97
|
|
|
'SPACE' => "\x20", |
98
|
|
|
'NO-BREAK SPACE' => "\xc2\xa0", |
99
|
|
|
'OGHAM SPACE MARK' => "\xe1\x9a\x80", |
100
|
|
|
'EN QUAD' => "\xe2\x80\x80", |
101
|
|
|
'EM QUAD' => "\xe2\x80\x81", |
102
|
|
|
'EN SPACE' => "\xe2\x80\x82", |
103
|
|
|
'EM SPACE' => "\xe2\x80\x83", |
104
|
|
|
'THREE-PER-EM SPACE' => "\xe2\x80\x84", |
105
|
|
|
'FOUR-PER-EM SPACE' => "\xe2\x80\x85", |
106
|
|
|
'SIX-PER-EM SPACE' => "\xe2\x80\x86", |
107
|
|
|
'FIGURE SPACE' => "\xe2\x80\x87", |
108
|
|
|
'PUNCTUATION SPACE' => "\xe2\x80\x88", |
109
|
|
|
'THIN SPACE' => "\xe2\x80\x89", |
110
|
|
|
'HAIR SPACE' => "\xe2\x80\x8a", |
111
|
|
|
'LINE SEPARATOR' => "\xe2\x80\xa8", |
112
|
|
|
'PARAGRAPH SEPARATOR' => "\xe2\x80\xa9", |
113
|
|
|
'ZERO WIDTH SPACE' => "\xe2\x80\x8b", |
114
|
|
|
'NARROW NO-BREAK SPACE' => "\xe2\x80\xaf", |
115
|
|
|
'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f", |
116
|
|
|
'IDEOGRAPHIC SPACE' => "\xe3\x80\x80", |
117
|
|
|
]; |
118
|
|
|
|
119
|
|
|
/** |
120
|
|
|
* bidirectional text chars |
121
|
|
|
* |
122
|
|
|
* url: https://www.w3.org/International/questions/qa-bidi-unicode-controls |
123
|
|
|
* |
124
|
|
|
* @var array |
125
|
|
|
*/ |
126
|
|
|
private static $BIDI_UNI_CODE_CONTROLS_TABLE = [ |
127
|
|
|
// LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr") |
128
|
|
|
8234 => "\xE2\x80\xAA", |
129
|
|
|
// RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl") |
130
|
|
|
8235 => "\xE2\x80\xAB", |
131
|
|
|
// POP DIRECTIONAL FORMATTING // (use -> </bdo>) |
132
|
|
|
8236 => "\xE2\x80\xAC", |
133
|
|
|
// LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">) |
134
|
|
|
8237 => "\xE2\x80\xAD", |
135
|
|
|
// RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">) |
136
|
|
|
8238 => "\xE2\x80\xAE", |
137
|
|
|
// LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr") |
138
|
|
|
8294 => "\xE2\x81\xA6", |
139
|
|
|
// RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl") |
140
|
|
|
8295 => "\xE2\x81\xA7", |
141
|
|
|
// FIRST STRONG ISOLATE // (use -> dir = "auto") |
142
|
|
|
8296 => "\xE2\x81\xA8", |
143
|
|
|
// POP DIRECTIONAL ISOLATE |
144
|
|
|
8297 => "\xE2\x81\xA9", |
145
|
|
|
]; |
146
|
|
|
|
147
|
|
|
/** |
148
|
|
|
* @var array |
149
|
|
|
*/ |
150
|
|
|
private static $COMMON_CASE_FOLD = [ |
151
|
|
|
'upper' => [ |
152
|
|
|
'µ', |
153
|
|
|
'ſ', |
154
|
|
|
"\xCD\x85", |
155
|
|
|
'ς', |
156
|
|
|
'ẞ', |
157
|
|
|
"\xCF\x90", |
158
|
|
|
"\xCF\x91", |
159
|
|
|
"\xCF\x95", |
160
|
|
|
"\xCF\x96", |
161
|
|
|
"\xCF\xB0", |
162
|
|
|
"\xCF\xB1", |
163
|
|
|
"\xCF\xB5", |
164
|
|
|
"\xE1\xBA\x9B", |
165
|
|
|
"\xE1\xBE\xBE", |
166
|
|
|
], |
167
|
|
|
'lower' => [ |
168
|
|
|
'μ', |
169
|
|
|
's', |
170
|
|
|
'ι', |
171
|
|
|
'σ', |
172
|
|
|
'ß', |
173
|
|
|
'β', |
174
|
|
|
'θ', |
175
|
|
|
'φ', |
176
|
|
|
'π', |
177
|
|
|
'κ', |
178
|
|
|
'ρ', |
179
|
|
|
'ε', |
180
|
|
|
"\xE1\xB9\xA1", |
181
|
|
|
'ι', |
182
|
|
|
], |
183
|
|
|
]; |
184
|
|
|
|
185
|
|
|
/** |
186
|
|
|
* @var array |
187
|
|
|
*/ |
188
|
|
|
private static $SUPPORT = []; |
189
|
|
|
|
190
|
|
|
/** |
191
|
|
|
* @var array|null |
192
|
|
|
*/ |
193
|
|
|
private static $UTF8_MSWORD; |
194
|
|
|
|
195
|
|
|
/** |
196
|
|
|
* @var array|null |
197
|
|
|
*/ |
198
|
|
|
private static $BROKEN_UTF8_FIX; |
199
|
|
|
|
200
|
|
|
/** |
201
|
|
|
* @var array|null |
202
|
|
|
*/ |
203
|
|
|
private static $WIN1252_TO_UTF8; |
204
|
|
|
|
205
|
|
|
/** |
206
|
|
|
* @var array|null |
207
|
|
|
*/ |
208
|
|
|
private static $ENCODINGS; |
209
|
|
|
|
210
|
|
|
/** |
211
|
|
|
* @var array|null |
212
|
|
|
*/ |
213
|
|
|
private static $ORD; |
214
|
|
|
|
215
|
|
|
/** |
216
|
|
|
* @var array|null |
217
|
|
|
*/ |
218
|
|
|
private static $CHR; |
219
|
|
|
|
220
|
|
|
/** |
221
|
|
|
* __construct() |
222
|
|
|
*/ |
223
|
32 |
|
public function __construct() |
224
|
|
|
{ |
225
|
32 |
|
self::checkForSupport(); |
226
|
32 |
|
} |
227
|
|
|
|
228
|
|
|
/** |
229
|
|
|
* Return the character at the specified position: $str[1] like functionality. |
230
|
|
|
* |
231
|
|
|
* @param string $str <p>A UTF-8 string.</p> |
232
|
|
|
* @param int $pos <p>The position of character to return.</p> |
233
|
|
|
* |
234
|
|
|
* @return string single multi-byte character |
235
|
|
|
*/ |
236
|
3 |
|
public static function access(string $str, int $pos): string |
237
|
|
|
{ |
238
|
3 |
|
if ($str === '') { |
239
|
1 |
|
return ''; |
240
|
|
|
} |
241
|
|
|
|
242
|
3 |
|
if ($pos < 0) { |
243
|
2 |
|
return ''; |
244
|
|
|
} |
245
|
|
|
|
246
|
3 |
|
return (string) self::substr($str, $pos, 1); |
247
|
|
|
} |
248
|
|
|
|
249
|
|
|
/** |
250
|
|
|
* Prepends UTF-8 BOM character to the string and returns the whole string. |
251
|
|
|
* |
252
|
|
|
* INFO: If BOM already existed there, the Input string is returned. |
253
|
|
|
* |
254
|
|
|
* @param string $str <p>The input string.</p> |
255
|
|
|
* |
256
|
|
|
* @return string the output string that contains BOM |
257
|
|
|
*/ |
258
|
2 |
|
public static function add_bom_to_string(string $str): string |
259
|
|
|
{ |
260
|
2 |
|
if (self::string_has_bom($str) === false) { |
261
|
2 |
|
$str = self::bom() . $str; |
262
|
|
|
} |
263
|
|
|
|
264
|
2 |
|
return $str; |
265
|
|
|
} |
266
|
|
|
|
267
|
|
|
/** |
268
|
|
|
* Adds the specified amount of left and right padding to the given string. |
269
|
|
|
* The default character used is a space. |
270
|
|
|
* |
271
|
|
|
* @param string $str |
272
|
|
|
* @param int $left [optional] <p>Length of left padding. Default: 0</p> |
273
|
|
|
* @param int $right [optional] <p>Length of right padding. Default: 0</p> |
274
|
|
|
* @param string $padStr [optional] <p>String used to pad. Default: ' '</p> |
275
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
276
|
|
|
* |
277
|
|
|
* @return string string with padding applied |
278
|
|
|
*/ |
279
|
25 |
|
private static function apply_padding(string $str, int $left = 0, int $right = 0, string $padStr = ' ', string $encoding): string |
280
|
|
|
{ |
281
|
25 |
|
$strlen = self::strlen($str, $encoding); |
282
|
|
|
|
283
|
25 |
|
if ($left && $right) { |
284
|
8 |
|
$length = ($left + $right) + $strlen; |
285
|
8 |
|
$type = \STR_PAD_BOTH; |
286
|
17 |
|
} elseif ($left) { |
287
|
7 |
|
$length = $left + $strlen; |
288
|
7 |
|
$type = \STR_PAD_LEFT; |
289
|
10 |
|
} elseif ($right) { |
290
|
10 |
|
$length = $right + $strlen; |
291
|
10 |
|
$type = \STR_PAD_RIGHT; |
292
|
|
|
} else { |
293
|
|
|
$length = ($left + $right) + $strlen; |
294
|
|
|
$type = \STR_PAD_BOTH; |
295
|
|
|
} |
296
|
|
|
|
297
|
25 |
|
return self::str_pad($str, $length, $padStr, $type, $encoding); |
298
|
|
|
} |
299
|
|
|
|
300
|
|
|
/** |
301
|
|
|
* Changes all keys in an array. |
302
|
|
|
* |
303
|
|
|
* @param array $array <p>The array to work on</p> |
304
|
|
|
* @param int $case [optional] <p> Either <strong>CASE_UPPER</strong><br> |
305
|
|
|
* or <strong>CASE_LOWER</strong> (default)</p> |
306
|
|
|
* |
307
|
|
|
* @return string[] an array with its keys lower or uppercased |
308
|
|
|
*/ |
309
|
2 |
|
public static function array_change_key_case(array $array, int $case = \CASE_LOWER): array |
310
|
|
|
{ |
311
|
|
|
if ( |
312
|
2 |
|
$case !== \CASE_LOWER |
313
|
|
|
&& |
314
|
2 |
|
$case !== \CASE_UPPER |
315
|
|
|
) { |
316
|
|
|
$case = \CASE_LOWER; |
317
|
|
|
} |
318
|
|
|
|
319
|
2 |
|
$return = []; |
320
|
2 |
|
foreach ($array as $key => $value) { |
321
|
2 |
|
if ($case === \CASE_LOWER) { |
322
|
2 |
|
$key = self::strtolower($key); |
323
|
|
|
} else { |
324
|
2 |
|
$key = self::strtoupper($key); |
325
|
|
|
} |
326
|
|
|
|
327
|
2 |
|
$return[$key] = $value; |
328
|
|
|
} |
329
|
|
|
|
330
|
2 |
|
return $return; |
331
|
|
|
} |
332
|
|
|
|
333
|
|
|
/** |
334
|
|
|
* Returns the substring between $start and $end, if found, or an empty |
335
|
|
|
* string. An optional offset may be supplied from which to begin the |
336
|
|
|
* search for the start string. |
337
|
|
|
* |
338
|
|
|
* @param string $str |
339
|
|
|
* @param string $start <p>Delimiter marking the start of the substring.</p> |
340
|
|
|
* @param string $end <p>Delimiter marking the end of the substring.</p> |
341
|
|
|
* @param int $offset [optional] <p>Index from which to begin the search. Default: 0</p> |
342
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
343
|
|
|
* |
344
|
|
|
* @return string |
345
|
|
|
*/ |
346
|
16 |
|
public static function between(string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8'): string |
347
|
|
|
{ |
348
|
16 |
|
$posStart = self::strpos($str, $start, $offset, $encoding); |
349
|
16 |
|
if ($posStart === false) { |
350
|
2 |
|
return ''; |
351
|
|
|
} |
352
|
|
|
|
353
|
14 |
|
$substrIndex = $posStart + self::strlen($start, $encoding); |
354
|
14 |
|
$posEnd = self::strpos($str, $end, $substrIndex, $encoding); |
355
|
|
|
if ( |
356
|
14 |
|
$posEnd === false |
357
|
|
|
|| |
358
|
14 |
|
$posEnd === $substrIndex |
359
|
|
|
) { |
360
|
4 |
|
return ''; |
361
|
|
|
} |
362
|
|
|
|
363
|
10 |
|
$return = self::substr($str, $substrIndex, $posEnd - $substrIndex, $encoding); |
364
|
|
|
|
365
|
10 |
|
if ($return === false) { |
366
|
|
|
return ''; |
367
|
|
|
} |
368
|
|
|
|
369
|
10 |
|
return $return; |
370
|
|
|
} |
371
|
|
|
|
372
|
|
|
/** |
373
|
|
|
* Convert binary into an string. |
374
|
|
|
* |
375
|
|
|
* @param mixed $bin 1|0 |
376
|
|
|
* |
377
|
|
|
* @return string |
378
|
|
|
*/ |
379
|
2 |
|
public static function binary_to_str($bin): string |
380
|
|
|
{ |
381
|
2 |
|
if (!isset($bin[0])) { |
382
|
|
|
return ''; |
383
|
|
|
} |
384
|
|
|
|
385
|
2 |
|
$convert = \base_convert($bin, 2, 16); |
386
|
2 |
|
if ($convert === '0') { |
387
|
1 |
|
return ''; |
388
|
|
|
} |
389
|
|
|
|
390
|
2 |
|
return \pack('H*', $convert); |
391
|
|
|
} |
392
|
|
|
|
393
|
|
|
/** |
394
|
|
|
* Returns the UTF-8 Byte Order Mark Character. |
395
|
|
|
* |
396
|
|
|
* INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values |
397
|
|
|
* |
398
|
|
|
* @return string UTF-8 Byte Order Mark |
399
|
|
|
*/ |
400
|
4 |
|
public static function bom(): string |
401
|
|
|
{ |
402
|
4 |
|
return "\xef\xbb\xbf"; |
403
|
|
|
} |
404
|
|
|
|
405
|
|
|
/** |
406
|
|
|
* @alias of UTF8::chr_map() |
407
|
|
|
* |
408
|
|
|
* @see UTF8::chr_map() |
409
|
|
|
* |
410
|
|
|
* @param array|string $callback |
411
|
|
|
* @param string $str |
412
|
|
|
* |
413
|
|
|
* @return string[] |
414
|
|
|
*/ |
415
|
2 |
|
public static function callback($callback, string $str): array |
416
|
|
|
{ |
417
|
2 |
|
return self::chr_map($callback, $str); |
418
|
|
|
} |
419
|
|
|
|
420
|
|
|
/** |
421
|
|
|
* Returns the character at $index, with indexes starting at 0. |
422
|
|
|
* |
423
|
|
|
* @param string $str |
424
|
|
|
* @param int $index <p>Position of the character.</p> |
425
|
|
|
* @param string $encoding [optional] <p>Default is UTF-8</p> |
426
|
|
|
* |
427
|
|
|
* @return string the character at $index |
428
|
|
|
*/ |
429
|
9 |
|
public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string |
430
|
|
|
{ |
431
|
9 |
|
return (string) self::substr($str, $index, 1, $encoding); |
432
|
|
|
} |
433
|
|
|
|
434
|
|
|
/** |
435
|
|
|
* Returns an array consisting of the characters in the string. |
436
|
|
|
* |
437
|
|
|
* @param string $str <p>The input string.</p> |
438
|
|
|
* |
439
|
|
|
* @return string[] an array of chars |
440
|
|
|
*/ |
441
|
3 |
|
public static function chars(string $str): array |
442
|
|
|
{ |
443
|
3 |
|
return self::str_split($str, 1); |
444
|
|
|
} |
445
|
|
|
|
446
|
|
|
/** |
447
|
|
|
* This method will auto-detect your server environment for UTF-8 support. |
448
|
|
|
* |
449
|
|
|
* INFO: You don't need to run it manually, it will be triggered if it's needed. |
450
|
|
|
*/ |
451
|
37 |
|
public static function checkForSupport() |
452
|
|
|
{ |
453
|
37 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
454
|
|
|
self::$SUPPORT['already_checked_via_portable_utf8'] = true; |
455
|
|
|
|
456
|
|
|
// http://php.net/manual/en/book.mbstring.php |
457
|
|
|
self::$SUPPORT['mbstring'] = self::mbstring_loaded(); |
458
|
|
|
self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded(); |
459
|
|
|
|
460
|
|
|
// http://php.net/manual/en/book.iconv.php |
461
|
|
|
self::$SUPPORT['iconv'] = self::iconv_loaded(); |
462
|
|
|
|
463
|
|
|
// http://php.net/manual/en/book.intl.php |
464
|
|
|
self::$SUPPORT['intl'] = self::intl_loaded(); |
465
|
|
|
self::$SUPPORT['intl__transliterator_list_ids'] = []; |
466
|
|
|
|
467
|
|
|
if ( |
468
|
|
|
self::$SUPPORT['intl'] === true |
469
|
|
|
&& |
470
|
|
|
\function_exists('transliterator_list_ids') === true |
471
|
|
|
) { |
472
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
473
|
|
|
self::$SUPPORT['intl__transliterator_list_ids'] = \transliterator_list_ids(); |
474
|
|
|
} |
475
|
|
|
|
476
|
|
|
// http://php.net/manual/en/class.intlchar.php |
477
|
|
|
self::$SUPPORT['intlChar'] = self::intlChar_loaded(); |
478
|
|
|
|
479
|
|
|
// http://php.net/manual/en/book.ctype.php |
480
|
|
|
self::$SUPPORT['ctype'] = self::ctype_loaded(); |
481
|
|
|
|
482
|
|
|
// http://php.net/manual/en/class.finfo.php |
483
|
|
|
self::$SUPPORT['finfo'] = self::finfo_loaded(); |
484
|
|
|
|
485
|
|
|
// http://php.net/manual/en/book.json.php |
486
|
|
|
self::$SUPPORT['json'] = self::json_loaded(); |
487
|
|
|
|
488
|
|
|
// http://php.net/manual/en/book.pcre.php |
489
|
|
|
self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support(); |
490
|
|
|
|
491
|
|
|
self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used(); |
492
|
|
|
} |
493
|
37 |
|
} |
494
|
|
|
|
495
|
|
|
/** |
496
|
|
|
* Generates a UTF-8 encoded character from the given code point. |
497
|
|
|
* |
498
|
|
|
* INFO: opposite to UTF8::ord() |
499
|
|
|
* |
500
|
|
|
* @param int|string $code_point <p>The code point for which to generate a character.</p> |
501
|
|
|
* @param string $encoding [optional] <p>Default is UTF-8</p> |
502
|
|
|
* |
503
|
|
|
* @return string|null multi-byte character, returns null on failure or empty input |
504
|
|
|
*/ |
505
|
17 |
|
public static function chr($code_point, string $encoding = 'UTF-8') |
506
|
|
|
{ |
507
|
|
|
// init |
508
|
17 |
|
static $CHAR_CACHE = []; |
509
|
|
|
|
510
|
17 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
511
|
|
|
self::checkForSupport(); |
512
|
|
|
} |
513
|
|
|
|
514
|
17 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
515
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
516
|
|
|
} |
517
|
|
|
|
518
|
|
|
if ( |
519
|
17 |
|
$encoding !== 'UTF-8' |
520
|
|
|
&& |
521
|
17 |
|
$encoding !== 'ISO-8859-1' |
522
|
|
|
&& |
523
|
17 |
|
$encoding !== 'WINDOWS-1252' |
524
|
|
|
&& |
525
|
17 |
|
self::$SUPPORT['mbstring'] === false |
526
|
|
|
) { |
527
|
|
|
\trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
528
|
|
|
} |
529
|
|
|
|
530
|
17 |
|
$cacheKey = $code_point . $encoding; |
531
|
17 |
|
if (isset($CHAR_CACHE[$cacheKey]) === true) { |
532
|
16 |
|
return $CHAR_CACHE[$cacheKey]; |
533
|
|
|
} |
534
|
|
|
|
535
|
11 |
|
if ($code_point <= 127) { // use "simple"-char only until "\x80" |
536
|
|
|
|
537
|
10 |
|
if (self::$CHR === null) { |
538
|
|
|
$chrTmp = self::getData('chr'); |
539
|
|
|
if ($chrTmp) { |
540
|
|
|
self::$CHR = (array) $chrTmp; |
541
|
|
|
} |
542
|
|
|
} |
543
|
|
|
|
544
|
10 |
|
$chr = self::$CHR[$code_point]; |
545
|
|
|
|
546
|
10 |
|
if ($encoding !== 'UTF-8') { |
547
|
1 |
|
$chr = self::encode($encoding, $chr); |
548
|
|
|
} |
549
|
|
|
|
550
|
10 |
|
return $CHAR_CACHE[$cacheKey] = $chr; |
551
|
|
|
} |
552
|
|
|
|
553
|
7 |
|
if (self::$SUPPORT['intlChar'] === true) { |
554
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
555
|
7 |
|
$chr = \IntlChar::chr($code_point); |
556
|
|
|
|
557
|
7 |
|
if ($encoding !== 'UTF-8') { |
558
|
|
|
$chr = self::encode($encoding, $chr); |
559
|
|
|
} |
560
|
|
|
|
561
|
7 |
|
return $CHAR_CACHE[$cacheKey] = $chr; |
562
|
|
|
} |
563
|
|
|
|
564
|
|
|
if (self::$CHR === null) { |
565
|
|
|
$chrTmp = self::getData('chr'); |
566
|
|
|
if ($chrTmp) { |
567
|
|
|
self::$CHR = (array) $chrTmp; |
568
|
|
|
} |
569
|
|
|
} |
570
|
|
|
|
571
|
|
|
$code_point = (int) $code_point; |
572
|
|
|
if ($code_point <= 0x7F) { |
573
|
|
|
$chr = self::$CHR[$code_point]; |
574
|
|
|
} elseif ($code_point <= 0x7FF) { |
575
|
|
|
$chr = self::$CHR[($code_point >> 6) + 0xC0] . |
576
|
|
|
self::$CHR[($code_point & 0x3F) + 0x80]; |
577
|
|
|
} elseif ($code_point <= 0xFFFF) { |
578
|
|
|
$chr = self::$CHR[($code_point >> 12) + 0xE0] . |
579
|
|
|
self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] . |
580
|
|
|
self::$CHR[($code_point & 0x3F) + 0x80]; |
581
|
|
|
} else { |
582
|
|
|
$chr = self::$CHR[($code_point >> 18) + 0xF0] . |
583
|
|
|
self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] . |
584
|
|
|
self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] . |
585
|
|
|
self::$CHR[($code_point & 0x3F) + 0x80]; |
586
|
|
|
} |
587
|
|
|
|
588
|
|
|
if ($encoding !== 'UTF-8') { |
589
|
|
|
$chr = self::encode($encoding, $chr); |
590
|
|
|
} |
591
|
|
|
|
592
|
|
|
return $CHAR_CACHE[$cacheKey] = $chr; |
593
|
|
|
} |
594
|
|
|
|
595
|
|
|
/** |
596
|
|
|
* Applies callback to all characters of a string. |
597
|
|
|
* |
598
|
|
|
* @param array|string $callback <p>The callback function.</p> |
599
|
|
|
* @param string $str <p>UTF-8 string to run callback on.</p> |
600
|
|
|
* |
601
|
|
|
* @return string[] the outcome of callback |
602
|
|
|
*/ |
603
|
2 |
|
public static function chr_map($callback, string $str): array |
604
|
|
|
{ |
605
|
2 |
|
$chars = self::split($str); |
606
|
|
|
|
607
|
2 |
|
return \array_map($callback, $chars); |
608
|
|
|
} |
609
|
|
|
|
610
|
|
|
/** |
611
|
|
|
* Generates an array of byte length of each character of a Unicode string. |
612
|
|
|
* |
613
|
|
|
* 1 byte => U+0000 - U+007F |
614
|
|
|
* 2 byte => U+0080 - U+07FF |
615
|
|
|
* 3 byte => U+0800 - U+FFFF |
616
|
|
|
* 4 byte => U+10000 - U+10FFFF |
617
|
|
|
* |
618
|
|
|
* @param string $str <p>The original unicode string.</p> |
619
|
|
|
* |
620
|
|
|
* @return int[] an array of byte lengths of each character |
621
|
|
|
*/ |
622
|
4 |
|
public static function chr_size_list(string $str): array |
623
|
|
|
{ |
624
|
4 |
|
if ($str === '') { |
625
|
4 |
|
return []; |
626
|
|
|
} |
627
|
|
|
|
628
|
4 |
|
$strSplit = self::split($str); |
629
|
|
|
|
630
|
4 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
631
|
|
|
self::checkForSupport(); |
632
|
|
|
} |
633
|
|
|
|
634
|
4 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
635
|
|
|
return \array_map( |
636
|
|
|
function ($data) { |
637
|
|
|
return self::strlen_in_byte($data); |
638
|
|
|
}, |
639
|
|
|
$strSplit |
640
|
|
|
); |
641
|
|
|
} |
642
|
|
|
|
643
|
4 |
|
return \array_map('\strlen', $strSplit); |
644
|
|
|
} |
645
|
|
|
|
646
|
|
|
/** |
647
|
|
|
* Get a decimal code representation of a specific character. |
648
|
|
|
* |
649
|
|
|
* @param string $char <p>The input character.</p> |
650
|
|
|
* |
651
|
|
|
* @return int |
652
|
|
|
*/ |
653
|
4 |
|
public static function chr_to_decimal(string $char): int |
654
|
|
|
{ |
655
|
4 |
|
$code = self::ord($char[0]); |
656
|
4 |
|
$bytes = 1; |
657
|
|
|
|
658
|
4 |
|
if (!($code & 0x80)) { |
659
|
|
|
// 0xxxxxxx |
660
|
4 |
|
return $code; |
661
|
|
|
} |
662
|
|
|
|
663
|
4 |
|
if (($code & 0xe0) === 0xc0) { |
664
|
|
|
// 110xxxxx |
665
|
4 |
|
$bytes = 2; |
666
|
4 |
|
$code &= ~0xc0; |
667
|
4 |
|
} elseif (($code & 0xf0) === 0xe0) { |
668
|
|
|
// 1110xxxx |
669
|
4 |
|
$bytes = 3; |
670
|
4 |
|
$code &= ~0xe0; |
671
|
2 |
|
} elseif (($code & 0xf8) === 0xf0) { |
672
|
|
|
// 11110xxx |
673
|
2 |
|
$bytes = 4; |
674
|
2 |
|
$code &= ~0xf0; |
675
|
|
|
} |
676
|
|
|
|
677
|
4 |
|
for ($i = 2; $i <= $bytes; $i++) { |
678
|
|
|
// 10xxxxxx |
679
|
4 |
|
$code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80); |
680
|
|
|
} |
681
|
|
|
|
682
|
4 |
|
return $code; |
683
|
|
|
} |
684
|
|
|
|
685
|
|
|
/** |
686
|
|
|
* Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character. |
687
|
|
|
* |
688
|
|
|
* @param int|string $char <p>The input character</p> |
689
|
|
|
* @param string $pfix [optional] |
690
|
|
|
* |
691
|
|
|
* @return string The code point encoded as U+xxxx |
692
|
|
|
*/ |
693
|
2 |
|
public static function chr_to_hex($char, string $pfix = 'U+'): string |
694
|
|
|
{ |
695
|
2 |
|
if ($char === '') { |
696
|
2 |
|
return ''; |
697
|
|
|
} |
698
|
|
|
|
699
|
2 |
|
if ($char === '�') { |
700
|
2 |
|
$char = ''; |
701
|
|
|
} |
702
|
|
|
|
703
|
2 |
|
return self::int_to_hex(self::ord($char), $pfix); |
704
|
|
|
} |
705
|
|
|
|
706
|
|
|
/** |
707
|
|
|
* alias for "UTF8::chr_to_decimal()" |
708
|
|
|
* |
709
|
|
|
* @see UTF8::chr_to_decimal() |
710
|
|
|
* |
711
|
|
|
* @param string $chr |
712
|
|
|
* |
713
|
|
|
* @return int |
714
|
|
|
*/ |
715
|
2 |
|
public static function chr_to_int(string $chr): int |
716
|
|
|
{ |
717
|
2 |
|
return self::chr_to_decimal($chr); |
718
|
|
|
} |
719
|
|
|
|
720
|
|
|
/** |
721
|
|
|
* Splits a string into smaller chunks and multiple lines, using the specified line ending character. |
722
|
|
|
* |
723
|
|
|
* @param string $body <p>The original string to be split.</p> |
724
|
|
|
* @param int $chunklen [optional] <p>The maximum character length of a chunk.</p> |
725
|
|
|
* @param string $end [optional] <p>The character(s) to be inserted at the end of each chunk.</p> |
726
|
|
|
* |
727
|
|
|
* @return string the chunked string |
728
|
|
|
*/ |
729
|
4 |
|
public static function chunk_split(string $body, int $chunklen = 76, string $end = "\r\n"): string |
730
|
|
|
{ |
731
|
4 |
|
return \implode($end, self::split($body, $chunklen)); |
732
|
|
|
} |
733
|
|
|
|
734
|
|
|
/** |
735
|
|
|
* Accepts a string and removes all non-UTF-8 characters from it + extras if needed. |
736
|
|
|
* |
737
|
|
|
* @param string $str <p>The string to be sanitized.</p> |
738
|
|
|
* @param bool $remove_bom [optional] <p>Set to true, if you need to remove UTF-BOM.</p> |
739
|
|
|
* @param bool $normalize_whitespace [optional] <p>Set to true, if you need to normalize the |
740
|
|
|
* whitespace.</p> |
741
|
|
|
* @param bool $normalize_msword [optional] <p>Set to true, if you need to normalize MS Word chars |
742
|
|
|
* e.g.: "…" |
743
|
|
|
* => "..."</p> |
744
|
|
|
* @param bool $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in |
745
|
|
|
* combination with |
746
|
|
|
* $normalize_whitespace</p> |
747
|
|
|
* @param bool $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond question |
748
|
|
|
* mark e.g.: "�"</p> |
749
|
|
|
* @param bool $remove_invisible_characters [optional] <p>Set to false, if you not want to remove invisible |
750
|
|
|
* characters e.g.: "\0"</p> |
751
|
|
|
* |
752
|
|
|
* @return string clean UTF-8 encoded string |
753
|
|
|
*/ |
754
|
111 |
|
public static function clean( |
755
|
|
|
string $str, |
756
|
|
|
bool $remove_bom = false, |
757
|
|
|
bool $normalize_whitespace = false, |
758
|
|
|
bool $normalize_msword = false, |
759
|
|
|
bool $keep_non_breaking_space = false, |
760
|
|
|
bool $replace_diamond_question_mark = false, |
761
|
|
|
bool $remove_invisible_characters = true |
762
|
|
|
): string { |
763
|
|
|
// http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string |
764
|
|
|
// caused connection reset problem on larger strings |
765
|
|
|
|
766
|
111 |
|
$regx = '/ |
767
|
|
|
( |
768
|
|
|
(?: [\x00-\x7F] # single-byte sequences 0xxxxxxx |
769
|
|
|
| [\xC0-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx |
770
|
|
|
| [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences 1110xxxx 10xxxxxx * 2 |
771
|
|
|
| [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3 |
772
|
|
|
){1,100} # ...one or more times |
773
|
|
|
) |
774
|
|
|
| ( [\x80-\xBF] ) # invalid byte in range 10000000 - 10111111 |
775
|
|
|
| ( [\xC0-\xFF] ) # invalid byte in range 11000000 - 11111111 |
776
|
|
|
/x'; |
777
|
111 |
|
$str = (string) \preg_replace($regx, '$1', $str); |
778
|
|
|
|
779
|
111 |
|
if ($replace_diamond_question_mark === true) { |
780
|
60 |
|
$str = self::replace_diamond_question_mark($str, ''); |
781
|
|
|
} |
782
|
|
|
|
783
|
111 |
|
if ($remove_invisible_characters === true) { |
784
|
111 |
|
$str = self::remove_invisible_characters($str); |
785
|
|
|
} |
786
|
|
|
|
787
|
111 |
|
if ($normalize_whitespace === true) { |
788
|
64 |
|
$str = self::normalize_whitespace($str, $keep_non_breaking_space); |
789
|
|
|
} |
790
|
|
|
|
791
|
111 |
|
if ($normalize_msword === true) { |
792
|
32 |
|
$str = self::normalize_msword($str); |
793
|
|
|
} |
794
|
|
|
|
795
|
111 |
|
if ($remove_bom === true) { |
796
|
62 |
|
$str = self::remove_bom($str); |
797
|
|
|
} |
798
|
|
|
|
799
|
111 |
|
return $str; |
800
|
|
|
} |
801
|
|
|
|
802
|
|
|
/** |
803
|
|
|
* Clean-up a and show only printable UTF-8 chars at the end + fix UTF-8 encoding. |
804
|
|
|
* |
805
|
|
|
* @param string $str <p>The input string.</p> |
806
|
|
|
* |
807
|
|
|
* @return string |
808
|
|
|
*/ |
809
|
33 |
|
public static function cleanup($str): string |
810
|
|
|
{ |
811
|
|
|
// init |
812
|
33 |
|
$str = (string) $str; |
813
|
|
|
|
814
|
33 |
|
if ($str === '') { |
815
|
5 |
|
return ''; |
816
|
|
|
} |
817
|
|
|
|
818
|
|
|
// fixed ISO <-> UTF-8 Errors |
819
|
33 |
|
$str = self::fix_simple_utf8($str); |
820
|
|
|
|
821
|
|
|
// remove all none UTF-8 symbols |
822
|
|
|
// && remove diamond question mark (�) |
823
|
|
|
// && remove remove invisible characters (e.g. "\0") |
824
|
|
|
// && remove BOM |
825
|
|
|
// && normalize whitespace chars (but keep non-breaking-spaces) |
826
|
33 |
|
return self::clean( |
827
|
33 |
|
$str, |
828
|
33 |
|
true, |
829
|
33 |
|
true, |
830
|
33 |
|
false, |
831
|
33 |
|
true, |
832
|
33 |
|
true, |
833
|
33 |
|
true |
834
|
|
|
); |
835
|
|
|
} |
836
|
|
|
|
837
|
|
|
/** |
838
|
|
|
* Accepts a string or a array of strings and returns an array of Unicode code points. |
839
|
|
|
* |
840
|
|
|
* INFO: opposite to UTF8::string() |
841
|
|
|
* |
842
|
|
|
* @param string|string[] $arg <p>A UTF-8 encoded string or an array of such strings.</p> |
843
|
|
|
* @param bool $u_style <p>If True, will return code points in U+xxxx format, |
844
|
|
|
* default, code points will be returned as integers.</p> |
845
|
|
|
* |
846
|
|
|
* @return array<int|string> |
847
|
|
|
* The array of code points:<br> |
848
|
|
|
* array<int> for $u_style === false<br> |
849
|
|
|
* array<string> for $u_style === true<br> |
850
|
|
|
*/ |
851
|
12 |
|
public static function codepoints($arg, bool $u_style = false): array |
852
|
|
|
{ |
853
|
12 |
|
if (\is_string($arg) === true) { |
854
|
12 |
|
$arg = self::split($arg); |
855
|
|
|
} |
856
|
|
|
|
857
|
12 |
|
$arg = \array_map( |
858
|
|
|
[ |
859
|
12 |
|
self::class, |
860
|
|
|
'ord', |
861
|
|
|
], |
862
|
12 |
|
$arg |
863
|
|
|
); |
864
|
|
|
|
865
|
12 |
|
if (\count($arg) === 0) { |
866
|
7 |
|
return []; |
867
|
|
|
} |
868
|
|
|
|
869
|
11 |
|
if ($u_style) { |
870
|
2 |
|
$arg = \array_map( |
871
|
|
|
[ |
872
|
2 |
|
self::class, |
873
|
|
|
'int_to_hex', |
874
|
|
|
], |
875
|
2 |
|
$arg |
876
|
|
|
); |
877
|
|
|
} |
878
|
|
|
|
879
|
11 |
|
return $arg; |
880
|
|
|
} |
881
|
|
|
|
882
|
|
|
/** |
883
|
|
|
* Trims the string and replaces consecutive whitespace characters with a |
884
|
|
|
* single space. This includes tabs and newline characters, as well as |
885
|
|
|
* multibyte whitespace such as the thin space and ideographic space. |
886
|
|
|
* |
887
|
|
|
* @param string $str <p>The input string.</p> |
888
|
|
|
* |
889
|
|
|
* @return string string with a trimmed $str and condensed whitespace |
890
|
|
|
*/ |
891
|
13 |
|
public static function collapse_whitespace(string $str): string |
892
|
|
|
{ |
893
|
13 |
|
return self::trim( |
894
|
13 |
|
self::regex_replace($str, '[[:space:]]+', ' ') |
895
|
|
|
); |
896
|
|
|
} |
897
|
|
|
|
898
|
|
|
/** |
899
|
|
|
* Returns count of characters used in a string. |
900
|
|
|
* |
901
|
|
|
* @param string $str <p>The input string.</p> |
902
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
903
|
|
|
* |
904
|
|
|
* @return int[] an associative array of Character as keys and |
905
|
|
|
* their count as values |
906
|
|
|
*/ |
907
|
19 |
|
public static function count_chars(string $str, bool $cleanUtf8 = false): array |
908
|
|
|
{ |
909
|
19 |
|
return \array_count_values(self::split($str, 1, $cleanUtf8)); |
910
|
|
|
} |
911
|
|
|
|
912
|
|
|
/** |
913
|
|
|
* Remove css media-queries. |
914
|
|
|
* |
915
|
|
|
* @param string $str |
916
|
|
|
* |
917
|
|
|
* @return string |
918
|
|
|
*/ |
919
|
1 |
|
public static function css_stripe_media_queries(string $str): string |
920
|
|
|
{ |
921
|
1 |
|
return (string) \preg_replace( |
922
|
1 |
|
'#@media\\s+(?:only\\s)?(?:[\\s{\\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU', |
923
|
1 |
|
'', |
924
|
1 |
|
$str |
925
|
|
|
); |
926
|
|
|
} |
927
|
|
|
|
928
|
|
|
/** |
929
|
|
|
* Checks whether ctype is available on the server. |
930
|
|
|
* |
931
|
|
|
* @return bool |
932
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
933
|
|
|
*/ |
934
|
|
|
public static function ctype_loaded(): bool |
935
|
|
|
{ |
936
|
|
|
return \extension_loaded('ctype'); |
937
|
|
|
} |
938
|
|
|
|
939
|
|
|
/** |
940
|
|
|
* Converts a int-value into an UTF-8 character. |
941
|
|
|
* |
942
|
|
|
* @param mixed $int |
943
|
|
|
* |
944
|
|
|
* @return string |
945
|
|
|
*/ |
946
|
10 |
|
public static function decimal_to_chr($int): string |
947
|
|
|
{ |
948
|
10 |
|
return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5); |
949
|
|
|
} |
950
|
|
|
|
951
|
|
|
/** |
952
|
|
|
* Decodes a MIME header field |
953
|
|
|
* |
954
|
|
|
* @param string $str |
955
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
956
|
|
|
* |
957
|
|
|
* @return false|string |
958
|
|
|
* A decoded MIME field on success, |
959
|
|
|
* or false if an error occurs during the decoding |
960
|
|
|
*/ |
961
|
|
|
public static function decode_mimeheader($str, $encoding = 'UTF-8') |
962
|
|
|
{ |
963
|
|
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
964
|
|
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
965
|
|
|
} |
966
|
|
|
|
967
|
|
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
968
|
|
|
self::checkForSupport(); |
969
|
|
|
} |
970
|
|
|
|
971
|
|
|
if (self::$SUPPORT['iconv'] === true) { |
972
|
|
|
return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding); |
973
|
|
|
} |
974
|
|
|
|
975
|
|
|
if ($encoding !== 'UTF-8') { |
976
|
|
|
$str = self::encode($encoding, $str); |
977
|
|
|
} |
978
|
|
|
|
979
|
|
|
return \mb_decode_mimeheader($str); |
980
|
|
|
} |
981
|
|
|
|
982
|
|
|
/** |
983
|
|
|
* Encode a string with a new charset-encoding. |
984
|
|
|
* |
985
|
|
|
* INFO: The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding, |
986
|
|
|
* so you can call this function also on a UTF-8 String and you don't mess the string. |
987
|
|
|
* |
988
|
|
|
* @param string $toEncoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p> |
989
|
|
|
* @param string $str <p>The input string</p> |
990
|
|
|
* @param bool $autodetectFromEncoding [optional] <p>Force the new encoding (we try to fix broken / double |
991
|
|
|
* encoding for UTF-8)<br> otherwise we auto-detect the current |
992
|
|
|
* string-encoding</p> |
993
|
|
|
* @param string $fromEncoding [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br> |
994
|
|
|
* A empty string will trigger the autodetect anyway.</p> |
995
|
|
|
* |
996
|
|
|
* @return string |
997
|
|
|
*/ |
998
|
28 |
|
public static function encode(string $toEncoding, string $str, bool $autodetectFromEncoding = true, string $fromEncoding = ''): string |
999
|
|
|
{ |
1000
|
28 |
|
if ($str === '' || $toEncoding === '') { |
1001
|
12 |
|
return $str; |
1002
|
|
|
} |
1003
|
|
|
|
1004
|
28 |
|
if ($toEncoding !== 'UTF-8' && $toEncoding !== 'CP850') { |
1005
|
6 |
|
$toEncoding = self::normalize_encoding($toEncoding, 'UTF-8'); |
1006
|
|
|
} |
1007
|
|
|
|
1008
|
28 |
|
if ($fromEncoding && $fromEncoding !== 'UTF-8' && $fromEncoding !== 'CP850') { |
1009
|
2 |
|
$fromEncoding = self::normalize_encoding($fromEncoding, null); |
1010
|
|
|
} |
1011
|
|
|
|
1012
|
28 |
|
if ($toEncoding && $fromEncoding && $fromEncoding === $toEncoding) { |
1013
|
|
|
return $str; |
1014
|
|
|
} |
1015
|
|
|
|
1016
|
28 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
1017
|
|
|
self::checkForSupport(); |
1018
|
|
|
} |
1019
|
|
|
|
1020
|
28 |
|
if ($toEncoding === 'JSON') { |
1021
|
1 |
|
return self::json_encode($str); |
1022
|
|
|
} |
1023
|
28 |
|
if ($fromEncoding === 'JSON') { |
1024
|
1 |
|
$str = self::json_decode($str); |
1025
|
1 |
|
$fromEncoding = ''; |
1026
|
|
|
} |
1027
|
|
|
|
1028
|
28 |
|
if ($toEncoding === 'BASE64') { |
1029
|
2 |
|
return \base64_encode($str); |
1030
|
|
|
} |
1031
|
28 |
|
if ($fromEncoding === 'BASE64') { |
1032
|
2 |
|
$str = \base64_decode($str, true); |
1033
|
2 |
|
$fromEncoding = ''; |
1034
|
|
|
} |
1035
|
|
|
|
1036
|
28 |
|
if ($toEncoding === 'HTML-ENTITIES') { |
1037
|
2 |
|
return self::html_encode($str, true, 'UTF-8'); |
1038
|
|
|
} |
1039
|
28 |
|
if ($fromEncoding === 'HTML-ENTITIES') { |
1040
|
2 |
|
$str = self::html_decode($str, \ENT_COMPAT, 'UTF-8'); |
1041
|
2 |
|
$fromEncoding = ''; |
1042
|
|
|
} |
1043
|
|
|
|
1044
|
28 |
|
$fromEncodingDetected = false; |
1045
|
|
|
if ( |
1046
|
28 |
|
$autodetectFromEncoding === true |
1047
|
|
|
|| |
1048
|
28 |
|
!$fromEncoding |
1049
|
|
|
) { |
1050
|
28 |
|
$fromEncodingDetected = self::str_detect_encoding($str); |
1051
|
|
|
} |
1052
|
|
|
|
1053
|
|
|
// DEBUG |
1054
|
|
|
//var_dump($toEncoding, $fromEncoding, $fromEncodingDetected, $str, "\n\n"); |
1055
|
|
|
|
1056
|
28 |
|
if ($fromEncodingDetected !== false) { |
1057
|
24 |
|
$fromEncoding = $fromEncodingDetected; |
1058
|
6 |
|
} elseif ($fromEncodingDetected === false && $autodetectFromEncoding === true) { |
1059
|
|
|
// fallback for the "autodetect"-mode |
1060
|
6 |
|
return self::to_utf8($str); |
1061
|
|
|
} |
1062
|
|
|
|
1063
|
|
|
if ( |
1064
|
24 |
|
!$fromEncoding |
1065
|
|
|
|| |
1066
|
24 |
|
$fromEncoding === $toEncoding |
1067
|
|
|
) { |
1068
|
15 |
|
return $str; |
1069
|
|
|
} |
1070
|
|
|
|
1071
|
|
|
if ( |
1072
|
18 |
|
$toEncoding === 'UTF-8' |
1073
|
|
|
&& |
1074
|
|
|
( |
1075
|
17 |
|
$fromEncoding === 'WINDOWS-1252' |
1076
|
|
|
|| |
1077
|
18 |
|
$fromEncoding === 'ISO-8859-1' |
1078
|
|
|
) |
1079
|
|
|
) { |
1080
|
14 |
|
return self::to_utf8($str); |
1081
|
|
|
} |
1082
|
|
|
|
1083
|
|
|
if ( |
1084
|
10 |
|
$toEncoding === 'ISO-8859-1' |
1085
|
|
|
&& |
1086
|
|
|
( |
1087
|
5 |
|
$fromEncoding === 'WINDOWS-1252' |
1088
|
|
|
|| |
1089
|
10 |
|
$fromEncoding === 'UTF-8' |
1090
|
|
|
) |
1091
|
|
|
) { |
1092
|
5 |
|
return self::to_iso8859($str); |
1093
|
|
|
} |
1094
|
|
|
|
1095
|
|
|
if ( |
1096
|
9 |
|
$toEncoding !== 'UTF-8' |
1097
|
|
|
&& |
1098
|
9 |
|
$toEncoding !== 'ISO-8859-1' |
1099
|
|
|
&& |
1100
|
9 |
|
$toEncoding !== 'WINDOWS-1252' |
1101
|
|
|
&& |
1102
|
9 |
|
self::$SUPPORT['mbstring'] === false |
1103
|
|
|
) { |
1104
|
|
|
\trigger_error('UTF8::encode() without mbstring cannot handle "' . $toEncoding . '" encoding', \E_USER_WARNING); |
1105
|
|
|
} |
1106
|
|
|
|
1107
|
9 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
1108
|
|
|
self::checkForSupport(); |
1109
|
|
|
} |
1110
|
|
|
|
1111
|
9 |
|
if (self::$SUPPORT['mbstring'] === true) { |
1112
|
|
|
// info: do not use the symfony polyfill here |
1113
|
9 |
|
$strEncoded = \mb_convert_encoding( |
1114
|
9 |
|
$str, |
1115
|
9 |
|
$toEncoding, |
1116
|
9 |
|
$fromEncoding |
1117
|
|
|
); |
1118
|
|
|
|
1119
|
9 |
|
if ($strEncoded) { |
1120
|
9 |
|
return $strEncoded; |
1121
|
|
|
} |
1122
|
|
|
} |
1123
|
|
|
|
1124
|
|
|
$return = \iconv($fromEncoding, $toEncoding, $str); |
1125
|
|
|
if ($return !== false) { |
1126
|
|
|
return $return; |
1127
|
|
|
} |
1128
|
|
|
|
1129
|
|
|
return $str; |
1130
|
|
|
} |
1131
|
|
|
|
1132
|
|
|
/** |
1133
|
|
|
* @param string $str |
1134
|
|
|
* @param string $fromCharset [optional] <p>Set the input charset.</p> |
1135
|
|
|
* @param string $toCharset [optional] <p>Set the output charset.</p> |
1136
|
|
|
* @param string $transferEncoding [optional] <p>Set the transfer encoding.</p> |
1137
|
|
|
* @param string $linefeed [optional] <p>Set the used linefeed.</p> |
1138
|
|
|
* @param int $indent [optional] <p>Set the max length indent.</p> |
1139
|
|
|
* |
1140
|
|
|
* @return false|string |
1141
|
|
|
* An encoded MIME field on success, |
1142
|
|
|
* or false if an error occurs during the encoding |
1143
|
|
|
*/ |
1144
|
|
|
public static function encode_mimeheader( |
1145
|
|
|
$str, |
1146
|
|
|
$fromCharset = 'UTF-8', |
1147
|
|
|
$toCharset = 'UTF-8', |
1148
|
|
|
$transferEncoding = 'Q', |
1149
|
|
|
$linefeed = "\r\n", |
1150
|
|
|
$indent = 76 |
1151
|
|
|
) { |
1152
|
|
|
if ($fromCharset !== 'UTF-8' && $fromCharset !== 'CP850') { |
1153
|
|
|
$fromCharset = self::normalize_encoding($fromCharset, 'UTF-8'); |
1154
|
|
|
} |
1155
|
|
|
|
1156
|
|
|
if ($toCharset !== 'UTF-8' && $toCharset !== 'CP850') { |
1157
|
|
|
$toCharset = self::normalize_encoding($toCharset, 'UTF-8'); |
1158
|
|
|
} |
1159
|
|
|
|
1160
|
|
|
return \iconv_mime_encode( |
1161
|
|
|
'', |
1162
|
|
|
$str, |
1163
|
|
|
[ |
1164
|
|
|
'scheme' => $transferEncoding, |
1165
|
|
|
'line-length' => $indent, |
1166
|
|
|
'input-charset' => $fromCharset, |
1167
|
|
|
'output-charset' => $toCharset, |
1168
|
|
|
'line-break-chars' => $linefeed, |
1169
|
|
|
] |
1170
|
|
|
); |
1171
|
|
|
} |
1172
|
|
|
|
1173
|
|
|
/** |
1174
|
|
|
* Create an extract from a sentence, so if the search-string was found, it try to centered in the output. |
1175
|
|
|
* |
1176
|
|
|
* @param string $str <p>The input string.</p> |
1177
|
|
|
* @param string $search <p>The searched string.</p> |
1178
|
|
|
* @param int|null $length [optional] <p>Default: null === text->length / 2</p> |
1179
|
|
|
* @param string $replacerForSkippedText [optional] <p>Default: …</p> |
1180
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
1181
|
|
|
* |
1182
|
|
|
* @return string |
1183
|
|
|
*/ |
1184
|
1 |
|
public static function extract_text(string $str, string $search = '', int $length = null, string $replacerForSkippedText = '…', string $encoding = 'UTF-8'): string |
1185
|
|
|
{ |
1186
|
1 |
|
if ($str === '') { |
1187
|
1 |
|
return ''; |
1188
|
|
|
} |
1189
|
|
|
|
1190
|
1 |
|
$trimChars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&"; |
1191
|
|
|
|
1192
|
1 |
|
if ($length === null) { |
1193
|
1 |
|
$length = (int) \round(self::strlen($str, $encoding) / 2, 0); |
1194
|
|
|
} |
1195
|
|
|
|
1196
|
1 |
|
if (empty($search)) { |
1197
|
1 |
|
$stringLength = self::strlen($str, $encoding); |
1198
|
|
|
|
1199
|
1 |
|
if ($length > 0) { |
1200
|
1 |
|
$end = ($length - 1) > $stringLength ? $stringLength : ($length - 1); |
1201
|
|
|
} else { |
1202
|
1 |
|
$end = 0; |
1203
|
|
|
} |
1204
|
|
|
|
1205
|
1 |
|
$pos = (int) \min( |
1206
|
1 |
|
self::strpos($str, ' ', $end, $encoding), |
|
|
|
|
1207
|
1 |
|
self::strpos($str, '.', $end, $encoding) |
1208
|
|
|
); |
1209
|
|
|
|
1210
|
1 |
|
if ($pos) { |
1211
|
1 |
|
$strSub = self::substr($str, 0, $pos, $encoding); |
1212
|
1 |
|
if ($strSub === false) { |
1213
|
|
|
return ''; |
1214
|
|
|
} |
1215
|
|
|
|
1216
|
1 |
|
return \rtrim($strSub, $trimChars) . $replacerForSkippedText; |
1217
|
|
|
} |
1218
|
|
|
|
1219
|
|
|
return $str; |
1220
|
|
|
} |
1221
|
|
|
|
1222
|
1 |
|
$wordPos = self::stripos($str, $search, 0, $encoding); |
1223
|
1 |
|
$halfSide = (int) ($wordPos - $length / 2 + self::strlen($search, $encoding) / 2); |
1224
|
|
|
|
1225
|
1 |
|
$pos_start = 0; |
1226
|
1 |
|
if ($halfSide > 0) { |
1227
|
1 |
|
$halfText = self::substr($str, 0, $halfSide, $encoding); |
1228
|
1 |
|
if ($halfText !== false) { |
1229
|
1 |
|
$pos_start = (int) \max( |
1230
|
1 |
|
self::strrpos($halfText, ' ', 0, $encoding), |
1231
|
1 |
|
self::strrpos($halfText, '.', 0, $encoding) |
1232
|
|
|
); |
1233
|
|
|
} |
1234
|
|
|
} |
1235
|
|
|
|
1236
|
1 |
|
if ($wordPos && $halfSide > 0) { |
|
|
|
|
1237
|
1 |
|
$l = $pos_start + $length - 1; |
1238
|
1 |
|
$realLength = self::strlen($str, $encoding); |
1239
|
|
|
|
1240
|
1 |
|
if ($l > $realLength) { |
1241
|
|
|
$l = $realLength; |
1242
|
|
|
} |
1243
|
|
|
|
1244
|
1 |
|
$pos_end = (int) \min( |
1245
|
1 |
|
self::strpos($str, ' ', $l, $encoding), |
1246
|
1 |
|
self::strpos($str, '.', $l, $encoding) |
1247
|
1 |
|
) - $pos_start; |
1248
|
|
|
|
1249
|
1 |
|
if (!$pos_end || $pos_end <= 0) { |
1250
|
1 |
|
$strSub = self::substr($str, $pos_start, self::strlen($str), $encoding); |
|
|
|
|
1251
|
1 |
|
if ($strSub !== false) { |
1252
|
1 |
|
$extract = $replacerForSkippedText . \ltrim($strSub, $trimChars); |
1253
|
|
|
} else { |
1254
|
1 |
|
$extract = ''; |
1255
|
|
|
} |
1256
|
|
|
} else { |
1257
|
1 |
|
$strSub = self::substr($str, $pos_start, $pos_end, $encoding); |
1258
|
1 |
|
if ($strSub !== false) { |
1259
|
1 |
|
$extract = $replacerForSkippedText . \trim($strSub, $trimChars) . $replacerForSkippedText; |
1260
|
|
|
} else { |
1261
|
1 |
|
$extract = ''; |
1262
|
|
|
} |
1263
|
|
|
} |
1264
|
|
|
} else { |
1265
|
1 |
|
$l = $length - 1; |
1266
|
1 |
|
$trueLength = self::strlen($str, $encoding); |
1267
|
|
|
|
1268
|
1 |
|
if ($l > $trueLength) { |
1269
|
|
|
$l = $trueLength; |
1270
|
|
|
} |
1271
|
|
|
|
1272
|
1 |
|
$pos_end = \min( |
1273
|
1 |
|
self::strpos($str, ' ', $l, $encoding), |
1274
|
1 |
|
self::strpos($str, '.', $l, $encoding) |
1275
|
|
|
); |
1276
|
|
|
|
1277
|
1 |
|
if ($pos_end) { |
1278
|
1 |
|
$strSub = self::substr($str, 0, $pos_end, $encoding); |
1279
|
1 |
|
if ($strSub !== false) { |
1280
|
1 |
|
$extract = \rtrim($strSub, $trimChars) . $replacerForSkippedText; |
1281
|
|
|
} else { |
1282
|
1 |
|
$extract = ''; |
1283
|
|
|
} |
1284
|
|
|
} else { |
1285
|
1 |
|
$extract = $str; |
1286
|
|
|
} |
1287
|
|
|
} |
1288
|
|
|
|
1289
|
1 |
|
return $extract; |
1290
|
|
|
} |
1291
|
|
|
|
1292
|
|
|
/** |
1293
|
|
|
* Reads entire file into a string. |
1294
|
|
|
* |
1295
|
|
|
* WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!! |
1296
|
|
|
* |
1297
|
|
|
* @see http://php.net/manual/en/function.file-get-contents.php |
1298
|
|
|
* |
1299
|
|
|
* @param string $filename <p> |
1300
|
|
|
* Name of the file to read. |
1301
|
|
|
* </p> |
1302
|
|
|
* @param bool $use_include_path [optional] <p> |
1303
|
|
|
* Prior to PHP 5, this parameter is called |
1304
|
|
|
* use_include_path and is a bool. |
1305
|
|
|
* As of PHP 5 the FILE_USE_INCLUDE_PATH can be used |
1306
|
|
|
* to trigger include path |
1307
|
|
|
* search. |
1308
|
|
|
* </p> |
1309
|
|
|
* @param resource|null $context [optional] <p> |
1310
|
|
|
* A valid context resource created with |
1311
|
|
|
* stream_context_create. If you don't need to use a |
1312
|
|
|
* custom context, you can skip this parameter by &null;. |
1313
|
|
|
* </p> |
1314
|
|
|
* @param int|null $offset [optional] <p> |
1315
|
|
|
* The offset where the reading starts. |
1316
|
|
|
* </p> |
1317
|
|
|
* @param int|null $maxLength [optional] <p> |
1318
|
|
|
* Maximum length of data read. The default is to read until end |
1319
|
|
|
* of file is reached. |
1320
|
|
|
* </p> |
1321
|
|
|
* @param int $timeout <p>The time in seconds for the timeout.</p> |
1322
|
|
|
* @param bool $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for |
1323
|
|
|
* some files, because they used non default utf-8 chars. Binary files |
1324
|
|
|
* like images or pdf will not be converted.</p> |
1325
|
|
|
* @param string $fromEncoding [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br> |
1326
|
|
|
* A empty string will trigger the autodetect anyway.</p> |
1327
|
|
|
* |
1328
|
|
|
* @return false|string the function returns the read data or false on failure |
1329
|
|
|
*/ |
1330
|
12 |
|
public static function file_get_contents( |
1331
|
|
|
string $filename, |
1332
|
|
|
bool $use_include_path = false, |
1333
|
|
|
$context = null, |
1334
|
|
|
int $offset = null, |
1335
|
|
|
int $maxLength = null, |
1336
|
|
|
int $timeout = 10, |
1337
|
|
|
bool $convertToUtf8 = true, |
1338
|
|
|
string $fromEncoding = '' |
1339
|
|
|
) { |
1340
|
|
|
// init |
1341
|
12 |
|
$filename = \filter_var($filename, \FILTER_SANITIZE_STRING); |
1342
|
|
|
|
1343
|
12 |
|
if ($timeout && $context === null) { |
1344
|
9 |
|
$context = \stream_context_create( |
1345
|
|
|
[ |
1346
|
|
|
'http' => [ |
1347
|
9 |
|
'timeout' => $timeout, |
1348
|
|
|
], |
1349
|
|
|
] |
1350
|
|
|
); |
1351
|
|
|
} |
1352
|
|
|
|
1353
|
12 |
|
if ($offset === null) { |
1354
|
12 |
|
$offset = 0; |
1355
|
|
|
} |
1356
|
|
|
|
1357
|
12 |
|
if (\is_int($maxLength) === true) { |
1358
|
2 |
|
$data = \file_get_contents($filename, $use_include_path, $context, $offset, $maxLength); |
1359
|
|
|
} else { |
1360
|
12 |
|
$data = \file_get_contents($filename, $use_include_path, $context, $offset); |
1361
|
|
|
} |
1362
|
|
|
|
1363
|
|
|
// return false on error |
1364
|
12 |
|
if ($data === false) { |
1365
|
|
|
return false; |
1366
|
|
|
} |
1367
|
|
|
|
1368
|
12 |
|
if ($convertToUtf8 === true) { |
1369
|
|
|
if ( |
1370
|
12 |
|
self::is_binary($data, true) === true |
1371
|
|
|
&& |
1372
|
12 |
|
self::is_utf16($data, false) === false |
1373
|
|
|
&& |
1374
|
12 |
|
self::is_utf32($data, false) === false |
1375
|
7 |
|
) { |
1376
|
|
|
// do nothing, it's binary and not UTF16 or UTF32 |
1377
|
|
|
} else { |
1378
|
9 |
|
$data = self::encode('UTF-8', $data, false, $fromEncoding); |
1379
|
9 |
|
$data = self::cleanup($data); |
1380
|
|
|
} |
1381
|
|
|
} |
1382
|
|
|
|
1383
|
12 |
|
return $data; |
1384
|
|
|
} |
1385
|
|
|
|
1386
|
|
|
/** |
1387
|
|
|
* Checks if a file starts with BOM (Byte Order Mark) character. |
1388
|
|
|
* |
1389
|
|
|
* @param string $file_path <p>Path to a valid file.</p> |
1390
|
|
|
* |
1391
|
|
|
* @throws \RuntimeException if file_get_contents() returned false |
1392
|
|
|
* |
1393
|
|
|
* @return bool |
1394
|
|
|
* <strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise |
1395
|
|
|
*/ |
1396
|
2 |
|
public static function file_has_bom(string $file_path): bool |
1397
|
|
|
{ |
1398
|
2 |
|
$file_content = \file_get_contents($file_path); |
1399
|
2 |
|
if ($file_content === false) { |
1400
|
|
|
throw new \RuntimeException('file_get_contents() returned false for:' . $file_path); |
1401
|
|
|
} |
1402
|
|
|
|
1403
|
2 |
|
return self::string_has_bom($file_content); |
1404
|
|
|
} |
1405
|
|
|
|
1406
|
|
|
/** |
1407
|
|
|
* Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
1408
|
|
|
* |
1409
|
|
|
* @param mixed $var |
1410
|
|
|
* @param int $normalization_form |
1411
|
|
|
* @param string $leading_combining |
1412
|
|
|
* |
1413
|
|
|
* @return mixed |
1414
|
|
|
*/ |
1415
|
43 |
|
public static function filter($var, int $normalization_form = 4 /* n::NFC */, string $leading_combining = '◌') |
1416
|
|
|
{ |
1417
|
43 |
|
switch (\gettype($var)) { |
1418
|
43 |
|
case 'array': |
1419
|
6 |
|
foreach ($var as $k => $v) { |
1420
|
|
|
/** @noinspection AlterInForeachInspection */ |
1421
|
6 |
|
$var[$k] = self::filter($v, $normalization_form, $leading_combining); |
1422
|
|
|
} |
1423
|
|
|
|
1424
|
6 |
|
break; |
1425
|
43 |
|
case 'object': |
1426
|
4 |
|
foreach ($var as $k => $v) { |
1427
|
4 |
|
$var->{$k} = self::filter($v, $normalization_form, $leading_combining); |
1428
|
|
|
} |
1429
|
|
|
|
1430
|
4 |
|
break; |
1431
|
43 |
|
case 'string': |
1432
|
|
|
|
1433
|
43 |
|
if (\strpos($var, "\r") !== false) { |
1434
|
|
|
// Workaround https://bugs.php.net/65732 |
1435
|
3 |
|
$var = self::normalize_line_ending($var); |
1436
|
|
|
} |
1437
|
|
|
|
1438
|
43 |
|
if (self::is_ascii($var) === false) { |
1439
|
|
|
/** @noinspection PhpUndefinedClassInspection */ |
1440
|
26 |
|
if (\Normalizer::isNormalized($var, $normalization_form)) { |
1441
|
21 |
|
$n = '-'; |
1442
|
|
|
} else { |
1443
|
|
|
/** @noinspection PhpUndefinedClassInspection */ |
1444
|
13 |
|
$n = \Normalizer::normalize($var, $normalization_form); |
1445
|
|
|
|
1446
|
13 |
|
if (isset($n[0])) { |
1447
|
7 |
|
$var = $n; |
1448
|
|
|
} else { |
1449
|
9 |
|
$var = self::encode('UTF-8', $var, true); |
1450
|
|
|
} |
1451
|
|
|
} |
1452
|
|
|
|
1453
|
|
|
if ( |
1454
|
26 |
|
$var[0] >= "\x80" |
1455
|
|
|
&& |
1456
|
26 |
|
isset($n[0], $leading_combining[0]) |
1457
|
|
|
&& |
1458
|
26 |
|
\preg_match('/^\p{Mn}/u', $var) |
1459
|
|
|
) { |
1460
|
|
|
// Prevent leading combining chars |
1461
|
|
|
// for NFC-safe concatenations. |
1462
|
3 |
|
$var = $leading_combining . $var; |
1463
|
|
|
} |
1464
|
|
|
} |
1465
|
|
|
|
1466
|
43 |
|
break; |
1467
|
|
|
} |
1468
|
|
|
|
1469
|
43 |
|
return $var; |
1470
|
|
|
} |
1471
|
|
|
|
1472
|
|
|
/** |
1473
|
|
|
* "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
1474
|
|
|
* |
1475
|
|
|
* Gets a specific external variable by name and optionally filters it |
1476
|
|
|
* |
1477
|
|
|
* @see http://php.net/manual/en/function.filter-input.php |
1478
|
|
|
* |
1479
|
|
|
* @param int $type <p> |
1480
|
|
|
* One of <b>INPUT_GET</b>, <b>INPUT_POST</b>, |
1481
|
|
|
* <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or |
1482
|
|
|
* <b>INPUT_ENV</b>. |
1483
|
|
|
* </p> |
1484
|
|
|
* @param string $variable_name <p> |
1485
|
|
|
* Name of a variable to get. |
1486
|
|
|
* </p> |
1487
|
|
|
* @param int $filter [optional] <p> |
1488
|
|
|
* The ID of the filter to apply. The |
1489
|
|
|
* manual page lists the available filters. |
1490
|
|
|
* </p> |
1491
|
|
|
* @param mixed $options [optional] <p> |
1492
|
|
|
* Associative array of options or bitwise disjunction of flags. If filter |
1493
|
|
|
* accepts options, flags can be provided in "flags" field of array. |
1494
|
|
|
* </p> |
1495
|
|
|
* |
1496
|
|
|
* @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the |
1497
|
|
|
* <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it |
1498
|
|
|
* returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails. |
1499
|
|
|
*/ |
1500
|
|
|
public static function filter_input(int $type, string $variable_name, int $filter = \FILTER_DEFAULT, $options = null) |
1501
|
|
|
{ |
1502
|
|
|
if (\func_num_args() < 4) { |
1503
|
|
|
$var = \filter_input($type, $variable_name, $filter); |
1504
|
|
|
} else { |
1505
|
|
|
$var = \filter_input($type, $variable_name, $filter, $options); |
1506
|
|
|
} |
1507
|
|
|
|
1508
|
|
|
return self::filter($var); |
1509
|
|
|
} |
1510
|
|
|
|
1511
|
|
|
/** |
1512
|
|
|
* "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
1513
|
|
|
* |
1514
|
|
|
* Gets external variables and optionally filters them |
1515
|
|
|
* |
1516
|
|
|
* @see http://php.net/manual/en/function.filter-input-array.php |
1517
|
|
|
* |
1518
|
|
|
* @param int $type <p> |
1519
|
|
|
* One of <b>INPUT_GET</b>, <b>INPUT_POST</b>, |
1520
|
|
|
* <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or |
1521
|
|
|
* <b>INPUT_ENV</b>. |
1522
|
|
|
* </p> |
1523
|
|
|
* @param mixed $definition [optional] <p> |
1524
|
|
|
* An array defining the arguments. A valid key is a string |
1525
|
|
|
* containing a variable name and a valid value is either a filter type, or an array |
1526
|
|
|
* optionally specifying the filter, flags and options. If the value is an |
1527
|
|
|
* array, valid keys are filter which specifies the |
1528
|
|
|
* filter type, |
1529
|
|
|
* flags which specifies any flags that apply to the |
1530
|
|
|
* filter, and options which specifies any options that |
1531
|
|
|
* apply to the filter. See the example below for a better understanding. |
1532
|
|
|
* </p> |
1533
|
|
|
* <p> |
1534
|
|
|
* This parameter can be also an integer holding a filter constant. Then all values in the |
1535
|
|
|
* input array are filtered by this filter. |
1536
|
|
|
* </p> |
1537
|
|
|
* @param bool $add_empty [optional] <p> |
1538
|
|
|
* Add missing keys as <b>NULL</b> to the return value. |
1539
|
|
|
* </p> |
1540
|
|
|
* |
1541
|
|
|
* @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. |
1542
|
|
|
* An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not |
1543
|
|
|
* set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable |
1544
|
|
|
* is not set and <b>NULL</b> if the filter fails. |
1545
|
|
|
*/ |
1546
|
|
|
public static function filter_input_array(int $type, $definition = null, bool $add_empty = true) |
1547
|
|
|
{ |
1548
|
|
|
if (\func_num_args() < 2) { |
1549
|
|
|
$a = \filter_input_array($type); |
1550
|
|
|
} else { |
1551
|
|
|
$a = \filter_input_array($type, $definition, $add_empty); |
1552
|
|
|
} |
1553
|
|
|
|
1554
|
|
|
return self::filter($a); |
1555
|
|
|
} |
1556
|
|
|
|
1557
|
|
|
/** |
1558
|
|
|
* "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
1559
|
|
|
* |
1560
|
|
|
* Filters a variable with a specified filter |
1561
|
|
|
* |
1562
|
|
|
* @see http://php.net/manual/en/function.filter-var.php |
1563
|
|
|
* |
1564
|
|
|
* @param mixed $variable <p> |
1565
|
|
|
* Value to filter. |
1566
|
|
|
* </p> |
1567
|
|
|
* @param int $filter [optional] <p> |
1568
|
|
|
* The ID of the filter to apply. The |
1569
|
|
|
* manual page lists the available filters. |
1570
|
|
|
* </p> |
1571
|
|
|
* @param mixed $options [optional] <p> |
1572
|
|
|
* Associative array of options or bitwise disjunction of flags. If filter |
1573
|
|
|
* accepts options, flags can be provided in "flags" field of array. For |
1574
|
|
|
* the "callback" filter, callable type should be passed. The |
1575
|
|
|
* callback must accept one argument, the value to be filtered, and return |
1576
|
|
|
* the value after filtering/sanitizing it. |
1577
|
|
|
* </p> |
1578
|
|
|
* <p> |
1579
|
|
|
* <code> |
1580
|
|
|
* // for filters that accept options, use this format |
1581
|
|
|
* $options = array( |
1582
|
|
|
* 'options' => array( |
1583
|
|
|
* 'default' => 3, // value to return if the filter fails |
1584
|
|
|
* // other options here |
1585
|
|
|
* 'min_range' => 0 |
1586
|
|
|
* ), |
1587
|
|
|
* 'flags' => FILTER_FLAG_ALLOW_OCTAL, |
1588
|
|
|
* ); |
1589
|
|
|
* $var = filter_var('0755', FILTER_VALIDATE_INT, $options); |
1590
|
|
|
* // for filter that only accept flags, you can pass them directly |
1591
|
|
|
* $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE); |
1592
|
|
|
* // for filter that only accept flags, you can also pass as an array |
1593
|
|
|
* $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, |
1594
|
|
|
* array('flags' => FILTER_NULL_ON_FAILURE)); |
1595
|
|
|
* // callback validate filter |
1596
|
|
|
* function foo($value) |
1597
|
|
|
* { |
1598
|
|
|
* // Expected format: Surname, GivenNames |
1599
|
|
|
* if (strpos($value, ", ") === false) return false; |
1600
|
|
|
* list($surname, $givennames) = explode(", ", $value, 2); |
1601
|
|
|
* $empty = (empty($surname) || empty($givennames)); |
1602
|
|
|
* $notstrings = (!is_string($surname) || !is_string($givennames)); |
1603
|
|
|
* if ($empty || $notstrings) { |
1604
|
|
|
* return false; |
1605
|
|
|
* } else { |
1606
|
|
|
* return $value; |
1607
|
|
|
* } |
1608
|
|
|
* } |
1609
|
|
|
* $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo')); |
1610
|
|
|
* </code> |
1611
|
|
|
* </p> |
1612
|
|
|
* |
1613
|
|
|
* @return mixed the filtered data, or <b>FALSE</b> if the filter fails |
1614
|
|
|
*/ |
1615
|
2 |
|
public static function filter_var($variable, int $filter = \FILTER_DEFAULT, $options = null) |
1616
|
|
|
{ |
1617
|
2 |
|
if (\func_num_args() < 3) { |
1618
|
2 |
|
$variable = \filter_var($variable, $filter); |
1619
|
|
|
} else { |
1620
|
2 |
|
$variable = \filter_var($variable, $filter, $options); |
1621
|
|
|
} |
1622
|
|
|
|
1623
|
2 |
|
return self::filter($variable); |
1624
|
|
|
} |
1625
|
|
|
|
1626
|
|
|
/** |
1627
|
|
|
* "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. |
1628
|
|
|
* |
1629
|
|
|
* Gets multiple variables and optionally filters them |
1630
|
|
|
* |
1631
|
|
|
* @see http://php.net/manual/en/function.filter-var-array.php |
1632
|
|
|
* |
1633
|
|
|
* @param array $data <p> |
1634
|
|
|
* An array with string keys containing the data to filter. |
1635
|
|
|
* </p> |
1636
|
|
|
* @param mixed $definition [optional] <p> |
1637
|
|
|
* An array defining the arguments. A valid key is a string |
1638
|
|
|
* containing a variable name and a valid value is either a |
1639
|
|
|
* filter type, or an |
1640
|
|
|
* array optionally specifying the filter, flags and options. |
1641
|
|
|
* If the value is an array, valid keys are filter |
1642
|
|
|
* which specifies the filter type, |
1643
|
|
|
* flags which specifies any flags that apply to the |
1644
|
|
|
* filter, and options which specifies any options that |
1645
|
|
|
* apply to the filter. See the example below for a better understanding. |
1646
|
|
|
* </p> |
1647
|
|
|
* <p> |
1648
|
|
|
* This parameter can be also an integer holding a filter constant. Then all values in the |
1649
|
|
|
* input array are filtered by this filter. |
1650
|
|
|
* </p> |
1651
|
|
|
* @param bool $add_empty [optional] <p> |
1652
|
|
|
* Add missing keys as <b>NULL</b> to the return value. |
1653
|
|
|
* </p> |
1654
|
|
|
* |
1655
|
|
|
* @return mixed an array containing the values of the requested variables on success, or <b>FALSE</b> on failure. |
1656
|
|
|
* An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not |
1657
|
|
|
* set |
1658
|
|
|
*/ |
1659
|
2 |
|
public static function filter_var_array(array $data, $definition = null, bool $add_empty = true) |
1660
|
|
|
{ |
1661
|
2 |
|
if (\func_num_args() < 2) { |
1662
|
2 |
|
$a = \filter_var_array($data); |
1663
|
|
|
} else { |
1664
|
2 |
|
$a = \filter_var_array($data, $definition, $add_empty); |
1665
|
|
|
} |
1666
|
|
|
|
1667
|
2 |
|
return self::filter($a); |
1668
|
|
|
} |
1669
|
|
|
|
1670
|
|
|
/** |
1671
|
|
|
* Checks whether finfo is available on the server. |
1672
|
|
|
* |
1673
|
|
|
* @return bool |
1674
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
1675
|
|
|
*/ |
1676
|
|
|
public static function finfo_loaded(): bool |
1677
|
|
|
{ |
1678
|
|
|
return \class_exists('finfo'); |
1679
|
|
|
} |
1680
|
|
|
|
1681
|
|
|
/** |
1682
|
|
|
* Returns the first $n characters of the string. |
1683
|
|
|
* |
1684
|
|
|
* @param string $str <p>The input string.</p> |
1685
|
|
|
* @param int $n <p>Number of characters to retrieve from the start.</p> |
1686
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
1687
|
|
|
* |
1688
|
|
|
* @return string |
1689
|
|
|
*/ |
1690
|
13 |
|
public static function first_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string |
1691
|
|
|
{ |
1692
|
13 |
|
if ($n <= 0) { |
1693
|
4 |
|
return ''; |
1694
|
|
|
} |
1695
|
|
|
|
1696
|
9 |
|
$strSub = self::substr($str, 0, $n, $encoding); |
1697
|
9 |
|
if ($strSub === false) { |
1698
|
|
|
return ''; |
1699
|
|
|
} |
1700
|
|
|
|
1701
|
9 |
|
return $strSub; |
1702
|
|
|
} |
1703
|
|
|
|
1704
|
|
|
/** |
1705
|
|
|
* Check if the number of unicode characters are not more than the specified integer. |
1706
|
|
|
* |
1707
|
|
|
* @param string $str the original string to be checked |
1708
|
|
|
* @param int $box_size the size in number of chars to be checked against string |
1709
|
|
|
* |
1710
|
|
|
* @return bool true if string is less than or equal to $box_size, false otherwise |
1711
|
|
|
*/ |
1712
|
2 |
|
public static function fits_inside(string $str, int $box_size): bool |
1713
|
|
|
{ |
1714
|
2 |
|
return self::strlen($str) <= $box_size; |
1715
|
|
|
} |
1716
|
|
|
|
1717
|
|
|
/** |
1718
|
|
|
* @param string $str |
1719
|
|
|
* @param bool $useLower <p>Use uppercase by default, otherwise use lowecase.</p> |
1720
|
|
|
* @param bool $fullCaseFold <p>Convert not only common cases.</p> |
1721
|
|
|
* |
1722
|
|
|
* @return string |
1723
|
|
|
*/ |
1724
|
54 |
|
private static function fixStrCaseHelper(string $str, $useLower = false, $fullCaseFold = false): string |
1725
|
|
|
{ |
1726
|
54 |
|
$upper = self::$COMMON_CASE_FOLD['upper']; |
1727
|
54 |
|
$lower = self::$COMMON_CASE_FOLD['lower']; |
1728
|
|
|
|
1729
|
54 |
|
if ($useLower === true) { |
1730
|
2 |
|
$str = (string) \str_replace( |
1731
|
2 |
|
$upper, |
1732
|
2 |
|
$lower, |
1733
|
2 |
|
$str |
1734
|
|
|
); |
1735
|
|
|
} else { |
1736
|
52 |
|
$str = (string) \str_replace( |
1737
|
52 |
|
$lower, |
1738
|
52 |
|
$upper, |
1739
|
52 |
|
$str |
1740
|
|
|
); |
1741
|
|
|
} |
1742
|
|
|
|
1743
|
54 |
|
if ($fullCaseFold) { |
1744
|
52 |
|
static $FULL_CASE_FOLD = null; |
1745
|
52 |
|
if ($FULL_CASE_FOLD === null) { |
1746
|
1 |
|
$FULL_CASE_FOLD = self::getData('caseFolding_full'); |
1747
|
|
|
} |
1748
|
|
|
|
1749
|
52 |
|
if ($useLower === true) { |
1750
|
2 |
|
$str = (string) \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str); |
1751
|
|
|
} else { |
1752
|
50 |
|
$str = (string) \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str); |
1753
|
|
|
} |
1754
|
|
|
} |
1755
|
|
|
|
1756
|
54 |
|
return $str; |
1757
|
|
|
} |
1758
|
|
|
|
1759
|
|
|
/** |
1760
|
|
|
* Try to fix simple broken UTF-8 strings. |
1761
|
|
|
* |
1762
|
|
|
* INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings. |
1763
|
|
|
* |
1764
|
|
|
* If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1 |
1765
|
|
|
* (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. |
1766
|
|
|
* See: http://en.wikipedia.org/wiki/Windows-1252 |
1767
|
|
|
* |
1768
|
|
|
* @param string $str <p>The input string</p> |
1769
|
|
|
* |
1770
|
|
|
* @return string |
1771
|
|
|
*/ |
1772
|
42 |
|
public static function fix_simple_utf8(string $str): string |
1773
|
|
|
{ |
1774
|
42 |
|
if ($str === '') { |
1775
|
4 |
|
return ''; |
1776
|
|
|
} |
1777
|
|
|
|
1778
|
42 |
|
static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; |
1779
|
42 |
|
static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; |
1780
|
|
|
|
1781
|
42 |
|
if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { |
1782
|
1 |
|
if (self::$BROKEN_UTF8_FIX === null) { |
1783
|
1 |
|
self::$BROKEN_UTF8_FIX = self::getData('utf8_fix'); |
1784
|
|
|
} |
1785
|
|
|
|
1786
|
1 |
|
$BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX); |
1787
|
1 |
|
$BROKEN_UTF8_TO_UTF8_VALUES_CACHE = \array_values(self::$BROKEN_UTF8_FIX); |
1788
|
|
|
} |
1789
|
|
|
|
1790
|
42 |
|
return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); |
1791
|
|
|
} |
1792
|
|
|
|
1793
|
|
|
/** |
1794
|
|
|
* Fix a double (or multiple) encoded UTF8 string. |
1795
|
|
|
* |
1796
|
|
|
* @param string|string[] $str you can use a string or an array of strings |
1797
|
|
|
* |
1798
|
|
|
* @return string|string[] |
1799
|
|
|
* Will return the fixed input-"array" or |
1800
|
|
|
* the fixed input-"string" |
1801
|
|
|
*/ |
1802
|
2 |
|
public static function fix_utf8($str) |
1803
|
|
|
{ |
1804
|
2 |
|
if (\is_array($str) === true) { |
1805
|
2 |
|
foreach ($str as $k => $v) { |
1806
|
2 |
|
$str[$k] = self::fix_utf8($v); |
1807
|
|
|
} |
1808
|
|
|
|
1809
|
2 |
|
return $str; |
1810
|
|
|
} |
1811
|
|
|
|
1812
|
2 |
|
$str = (string) $str; |
1813
|
2 |
|
$last = ''; |
1814
|
2 |
|
while ($last !== $str) { |
1815
|
2 |
|
$last = $str; |
1816
|
2 |
|
$str = self::to_utf8( |
1817
|
2 |
|
self::utf8_decode($str, true) |
1818
|
|
|
); |
1819
|
|
|
} |
1820
|
|
|
|
1821
|
2 |
|
return $str; |
1822
|
|
|
} |
1823
|
|
|
|
1824
|
|
|
/** |
1825
|
|
|
* Get character of a specific character. |
1826
|
|
|
* |
1827
|
|
|
* @param string $char |
1828
|
|
|
* |
1829
|
|
|
* @return string 'RTL' or 'LTR' |
1830
|
|
|
*/ |
1831
|
2 |
|
public static function getCharDirection(string $char): string |
1832
|
|
|
{ |
1833
|
2 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
1834
|
|
|
self::checkForSupport(); |
1835
|
|
|
} |
1836
|
|
|
|
1837
|
2 |
|
if (self::$SUPPORT['intlChar'] === true) { |
1838
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
1839
|
2 |
|
$tmpReturn = \IntlChar::charDirection($char); |
1840
|
|
|
|
1841
|
|
|
// from "IntlChar"-Class |
1842
|
|
|
$charDirection = [ |
1843
|
2 |
|
'RTL' => [1, 13, 14, 15, 21], |
1844
|
|
|
'LTR' => [0, 11, 12, 20], |
1845
|
|
|
]; |
1846
|
|
|
|
1847
|
2 |
|
if (\in_array($tmpReturn, $charDirection['LTR'], true)) { |
1848
|
|
|
return 'LTR'; |
1849
|
|
|
} |
1850
|
|
|
|
1851
|
2 |
|
if (\in_array($tmpReturn, $charDirection['RTL'], true)) { |
1852
|
2 |
|
return 'RTL'; |
1853
|
|
|
} |
1854
|
|
|
} |
1855
|
|
|
|
1856
|
2 |
|
$c = static::chr_to_decimal($char); |
1857
|
|
|
|
1858
|
2 |
|
if (!($c >= 0x5be && $c <= 0x10b7f)) { |
1859
|
2 |
|
return 'LTR'; |
1860
|
|
|
} |
1861
|
|
|
|
1862
|
2 |
|
if ($c <= 0x85e) { |
1863
|
2 |
|
if ($c === 0x5be || |
1864
|
2 |
|
$c === 0x5c0 || |
1865
|
2 |
|
$c === 0x5c3 || |
1866
|
2 |
|
$c === 0x5c6 || |
1867
|
2 |
|
($c >= 0x5d0 && $c <= 0x5ea) || |
1868
|
2 |
|
($c >= 0x5f0 && $c <= 0x5f4) || |
1869
|
2 |
|
$c === 0x608 || |
1870
|
2 |
|
$c === 0x60b || |
1871
|
2 |
|
$c === 0x60d || |
1872
|
2 |
|
$c === 0x61b || |
1873
|
2 |
|
($c >= 0x61e && $c <= 0x64a) || |
1874
|
|
|
($c >= 0x66d && $c <= 0x66f) || |
1875
|
|
|
($c >= 0x671 && $c <= 0x6d5) || |
1876
|
|
|
($c >= 0x6e5 && $c <= 0x6e6) || |
1877
|
|
|
($c >= 0x6ee && $c <= 0x6ef) || |
1878
|
|
|
($c >= 0x6fa && $c <= 0x70d) || |
1879
|
|
|
$c === 0x710 || |
1880
|
|
|
($c >= 0x712 && $c <= 0x72f) || |
1881
|
|
|
($c >= 0x74d && $c <= 0x7a5) || |
1882
|
|
|
$c === 0x7b1 || |
1883
|
|
|
($c >= 0x7c0 && $c <= 0x7ea) || |
1884
|
|
|
($c >= 0x7f4 && $c <= 0x7f5) || |
1885
|
|
|
$c === 0x7fa || |
1886
|
|
|
($c >= 0x800 && $c <= 0x815) || |
1887
|
|
|
$c === 0x81a || |
1888
|
|
|
$c === 0x824 || |
1889
|
|
|
$c === 0x828 || |
1890
|
|
|
($c >= 0x830 && $c <= 0x83e) || |
1891
|
|
|
($c >= 0x840 && $c <= 0x858) || |
1892
|
2 |
|
$c === 0x85e |
1893
|
|
|
) { |
1894
|
2 |
|
return 'RTL'; |
1895
|
|
|
} |
1896
|
2 |
|
} elseif ($c === 0x200f) { |
1897
|
|
|
return 'RTL'; |
1898
|
2 |
|
} elseif ($c >= 0xfb1d) { |
1899
|
2 |
|
if ($c === 0xfb1d || |
1900
|
2 |
|
($c >= 0xfb1f && $c <= 0xfb28) || |
1901
|
2 |
|
($c >= 0xfb2a && $c <= 0xfb36) || |
1902
|
2 |
|
($c >= 0xfb38 && $c <= 0xfb3c) || |
1903
|
2 |
|
$c === 0xfb3e || |
1904
|
2 |
|
($c >= 0xfb40 && $c <= 0xfb41) || |
1905
|
2 |
|
($c >= 0xfb43 && $c <= 0xfb44) || |
1906
|
2 |
|
($c >= 0xfb46 && $c <= 0xfbc1) || |
1907
|
2 |
|
($c >= 0xfbd3 && $c <= 0xfd3d) || |
1908
|
2 |
|
($c >= 0xfd50 && $c <= 0xfd8f) || |
1909
|
2 |
|
($c >= 0xfd92 && $c <= 0xfdc7) || |
1910
|
2 |
|
($c >= 0xfdf0 && $c <= 0xfdfc) || |
1911
|
2 |
|
($c >= 0xfe70 && $c <= 0xfe74) || |
1912
|
2 |
|
($c >= 0xfe76 && $c <= 0xfefc) || |
1913
|
2 |
|
($c >= 0x10800 && $c <= 0x10805) || |
1914
|
2 |
|
$c === 0x10808 || |
1915
|
2 |
|
($c >= 0x1080a && $c <= 0x10835) || |
1916
|
2 |
|
($c >= 0x10837 && $c <= 0x10838) || |
1917
|
2 |
|
$c === 0x1083c || |
1918
|
2 |
|
($c >= 0x1083f && $c <= 0x10855) || |
1919
|
2 |
|
($c >= 0x10857 && $c <= 0x1085f) || |
1920
|
2 |
|
($c >= 0x10900 && $c <= 0x1091b) || |
1921
|
2 |
|
($c >= 0x10920 && $c <= 0x10939) || |
1922
|
2 |
|
$c === 0x1093f || |
1923
|
2 |
|
$c === 0x10a00 || |
1924
|
2 |
|
($c >= 0x10a10 && $c <= 0x10a13) || |
1925
|
2 |
|
($c >= 0x10a15 && $c <= 0x10a17) || |
1926
|
2 |
|
($c >= 0x10a19 && $c <= 0x10a33) || |
1927
|
2 |
|
($c >= 0x10a40 && $c <= 0x10a47) || |
1928
|
2 |
|
($c >= 0x10a50 && $c <= 0x10a58) || |
1929
|
2 |
|
($c >= 0x10a60 && $c <= 0x10a7f) || |
1930
|
2 |
|
($c >= 0x10b00 && $c <= 0x10b35) || |
1931
|
2 |
|
($c >= 0x10b40 && $c <= 0x10b55) || |
1932
|
2 |
|
($c >= 0x10b58 && $c <= 0x10b72) || |
1933
|
2 |
|
($c >= 0x10b78 && $c <= 0x10b7f) |
1934
|
|
|
) { |
1935
|
2 |
|
return 'RTL'; |
1936
|
|
|
} |
1937
|
|
|
} |
1938
|
|
|
|
1939
|
2 |
|
return 'LTR'; |
1940
|
|
|
} |
1941
|
|
|
|
1942
|
|
|
/** |
1943
|
|
|
* get data from "/data/*.php" |
1944
|
|
|
* |
1945
|
|
|
* @param string $file |
1946
|
|
|
* |
1947
|
|
|
* @return mixed |
1948
|
|
|
*/ |
1949
|
5 |
|
private static function getData(string $file) |
1950
|
|
|
{ |
1951
|
|
|
/** @noinspection PhpIncludeInspection */ |
1952
|
5 |
|
return include __DIR__ . '/data/' . $file . '.php'; |
1953
|
|
|
} |
1954
|
|
|
|
1955
|
|
|
/** |
1956
|
|
|
* get data from "/data/*.php" |
1957
|
|
|
* |
1958
|
|
|
* @param string $file |
1959
|
|
|
* |
1960
|
|
|
* @return false|mixed will return false on error |
1961
|
|
|
*/ |
1962
|
9 |
|
private static function getDataIfExists(string $file) |
1963
|
|
|
{ |
1964
|
9 |
|
$file = __DIR__ . '/data/' . $file . '.php'; |
1965
|
9 |
|
if (\file_exists($file)) { |
1966
|
|
|
/** @noinspection PhpIncludeInspection */ |
1967
|
8 |
|
return include $file; |
1968
|
|
|
} |
1969
|
|
|
|
1970
|
2 |
|
return false; |
1971
|
|
|
} |
1972
|
|
|
|
1973
|
|
|
/** |
1974
|
|
|
* Check for php-support. |
1975
|
|
|
* |
1976
|
|
|
* @param string|null $key |
1977
|
|
|
* |
1978
|
|
|
* @return mixed |
1979
|
|
|
* Return the full support-"array", if $key === null<br> |
1980
|
|
|
* return bool-value, if $key is used and available<br> |
1981
|
|
|
* otherwise return <strong>null</strong> |
1982
|
|
|
*/ |
1983
|
26 |
|
public static function getSupportInfo(string $key = null) |
1984
|
|
|
{ |
1985
|
26 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
1986
|
|
|
self::checkForSupport(); |
1987
|
|
|
} |
1988
|
|
|
|
1989
|
26 |
|
if ($key === null) { |
1990
|
4 |
|
return self::$SUPPORT; |
1991
|
|
|
} |
1992
|
|
|
|
1993
|
24 |
|
if (!isset(self::$SUPPORT[$key])) { |
1994
|
2 |
|
return null; |
1995
|
|
|
} |
1996
|
|
|
|
1997
|
22 |
|
return self::$SUPPORT[$key]; |
1998
|
|
|
} |
1999
|
|
|
|
2000
|
|
|
/** |
2001
|
|
|
* Warning: this method only works for some file-types (png bmp gif jpg rar zip midi exe pdf) |
2002
|
|
|
* if you need more supported types, please use e.g. "finfo" |
2003
|
|
|
* |
2004
|
|
|
* @param string $str |
2005
|
|
|
* @param array $fallback with this keys: 'ext', 'mime', 'type' |
2006
|
|
|
* |
2007
|
|
|
* @return array |
2008
|
|
|
* with this keys: 'ext', 'mime', 'type' |
2009
|
|
|
*/ |
2010
|
39 |
|
public static function get_file_type( |
2011
|
|
|
string $str, |
2012
|
|
|
array $fallback = [ |
2013
|
|
|
'ext' => null, |
2014
|
|
|
'mime' => 'application/octet-stream', |
2015
|
|
|
'type' => null, |
2016
|
|
|
] |
2017
|
|
|
): array { |
2018
|
39 |
|
if ($str === '') { |
2019
|
|
|
return $fallback; |
2020
|
|
|
} |
2021
|
|
|
|
2022
|
39 |
|
$str_info = self::substr_in_byte($str, 0, 2); |
2023
|
39 |
|
if (self::strlen_in_byte($str_info) !== 2) { |
2024
|
10 |
|
return $fallback; |
2025
|
|
|
} |
2026
|
|
|
|
2027
|
35 |
|
$str_info = \unpack('C2chars', $str_info); |
2028
|
35 |
|
$type_code = (int) ($str_info['chars1'] . $str_info['chars2']); |
2029
|
|
|
|
2030
|
|
|
// DEBUG |
2031
|
|
|
//var_dump($type_code); |
2032
|
|
|
|
2033
|
|
|
switch ($type_code) { |
2034
|
35 |
|
case 3780: |
2035
|
5 |
|
$ext = 'pdf'; |
2036
|
5 |
|
$mime = 'application/pdf'; |
2037
|
5 |
|
$type = 'binary'; |
2038
|
|
|
|
2039
|
5 |
|
break; |
2040
|
35 |
|
case 7790: |
2041
|
|
|
$ext = 'exe'; |
2042
|
|
|
$mime = 'application/octet-stream'; |
2043
|
|
|
$type = 'binary'; |
2044
|
|
|
|
2045
|
|
|
break; |
2046
|
35 |
|
case 7784: |
2047
|
|
|
$ext = 'midi'; |
2048
|
|
|
$mime = 'audio/x-midi'; |
2049
|
|
|
$type = 'binary'; |
2050
|
|
|
|
2051
|
|
|
break; |
2052
|
35 |
|
case 8075: |
2053
|
7 |
|
$ext = 'zip'; |
2054
|
7 |
|
$mime = 'application/zip'; |
2055
|
7 |
|
$type = 'binary'; |
2056
|
|
|
|
2057
|
7 |
|
break; |
2058
|
35 |
|
case 8297: |
2059
|
|
|
$ext = 'rar'; |
2060
|
|
|
$mime = 'application/rar'; |
2061
|
|
|
$type = 'binary'; |
2062
|
|
|
|
2063
|
|
|
break; |
2064
|
35 |
|
case 255216: |
2065
|
|
|
$ext = 'jpg'; |
2066
|
|
|
$mime = 'image/jpeg'; |
2067
|
|
|
$type = 'binary'; |
2068
|
|
|
|
2069
|
|
|
break; |
2070
|
35 |
|
case 7173: |
2071
|
|
|
$ext = 'gif'; |
2072
|
|
|
$mime = 'image/gif'; |
2073
|
|
|
$type = 'binary'; |
2074
|
|
|
|
2075
|
|
|
break; |
2076
|
35 |
|
case 6677: |
2077
|
|
|
$ext = 'bmp'; |
2078
|
|
|
$mime = 'image/bmp'; |
2079
|
|
|
$type = 'binary'; |
2080
|
|
|
|
2081
|
|
|
break; |
2082
|
35 |
|
case 13780: |
2083
|
7 |
|
$ext = 'png'; |
2084
|
7 |
|
$mime = 'image/png'; |
2085
|
7 |
|
$type = 'binary'; |
2086
|
|
|
|
2087
|
7 |
|
break; |
2088
|
|
|
default: |
2089
|
32 |
|
return $fallback; |
2090
|
|
|
} |
2091
|
|
|
|
2092
|
|
|
return [ |
2093
|
7 |
|
'ext' => $ext, |
2094
|
7 |
|
'mime' => $mime, |
2095
|
7 |
|
'type' => $type, |
2096
|
|
|
]; |
2097
|
|
|
} |
2098
|
|
|
|
2099
|
|
|
/** |
2100
|
|
|
* @param int $length <p>Length of the random string.</p> |
2101
|
|
|
* @param string $possibleChars [optional] <p>Characters string for the random selection.</p> |
2102
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
2103
|
|
|
* |
2104
|
|
|
* @return string |
2105
|
|
|
*/ |
2106
|
1 |
|
public static function get_random_string(int $length, string $possibleChars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8'): string |
2107
|
|
|
{ |
2108
|
|
|
// init |
2109
|
1 |
|
$i = 0; |
2110
|
1 |
|
$str = ''; |
2111
|
1 |
|
$maxlength = self::strlen($possibleChars, $encoding); |
2112
|
|
|
|
2113
|
1 |
|
if ($maxlength === 0) { |
2114
|
1 |
|
return ''; |
2115
|
|
|
} |
2116
|
|
|
|
2117
|
|
|
// add random chars |
2118
|
1 |
|
while ($i < $length) { |
2119
|
|
|
try { |
2120
|
1 |
|
$randInt = \random_int(0, $maxlength - 1); |
2121
|
|
|
} catch (\Exception $e) { |
2122
|
|
|
/** @noinspection RandomApiMigrationInspection */ |
2123
|
|
|
$randInt = \mt_rand(0, $maxlength - 1); |
2124
|
|
|
} |
2125
|
1 |
|
$char = self::substr($possibleChars, $randInt, 1, $encoding); |
2126
|
1 |
|
$str .= $char; |
2127
|
1 |
|
$i++; |
2128
|
|
|
} |
2129
|
|
|
|
2130
|
1 |
|
return $str; |
2131
|
|
|
} |
2132
|
|
|
|
2133
|
|
|
/** |
2134
|
|
|
* @param int|string $entropyExtra [optional] <p>Extra entropy via a string or int value.</p> |
2135
|
|
|
* @param bool $md5 [optional] <p>Return the unique identifier as md5-hash? Default: true</p> |
2136
|
|
|
* |
2137
|
|
|
* @return string |
2138
|
|
|
*/ |
2139
|
1 |
|
public static function get_unique_string($entropyExtra = '', bool $md5 = true): string |
2140
|
|
|
{ |
2141
|
1 |
|
$uniqueHelper = \mt_rand() . |
2142
|
1 |
|
\session_id() . |
2143
|
1 |
|
($_SERVER['REMOTE_ADDR'] ?? '') . |
2144
|
1 |
|
($_SERVER['SERVER_ADDR'] ?? '') . |
2145
|
1 |
|
$entropyExtra; |
2146
|
|
|
|
2147
|
1 |
|
$uniqueString = \uniqid($uniqueHelper, true); |
2148
|
|
|
|
2149
|
1 |
|
if ($md5) { |
2150
|
1 |
|
$uniqueString = \md5($uniqueString . $uniqueHelper); |
2151
|
|
|
} |
2152
|
|
|
|
2153
|
1 |
|
return $uniqueString; |
2154
|
|
|
} |
2155
|
|
|
|
2156
|
|
|
/** |
2157
|
|
|
* alias for "UTF8::string_has_bom()" |
2158
|
|
|
* |
2159
|
|
|
* @see UTF8::string_has_bom() |
2160
|
|
|
* |
2161
|
|
|
* @param string $str |
2162
|
|
|
* |
2163
|
|
|
* @return bool |
2164
|
|
|
* |
2165
|
|
|
* @deprecated <p>use "UTF8::string_has_bom()"</p> |
2166
|
|
|
*/ |
2167
|
2 |
|
public static function hasBom(string $str): bool |
2168
|
|
|
{ |
2169
|
2 |
|
return self::string_has_bom($str); |
2170
|
|
|
} |
2171
|
|
|
|
2172
|
|
|
/** |
2173
|
|
|
* Returns true if the string contains a lower case char, false otherwise. |
2174
|
|
|
* |
2175
|
|
|
* @param string $str <p>The input string.</p> |
2176
|
|
|
* |
2177
|
|
|
* @return bool whether or not the string contains a lower case character |
2178
|
|
|
*/ |
2179
|
47 |
|
public static function has_lowercase(string $str): bool |
2180
|
|
|
{ |
2181
|
47 |
|
return self::str_matches_pattern($str, '.*[[:lower:]]'); |
2182
|
|
|
} |
2183
|
|
|
|
2184
|
|
|
/** |
2185
|
|
|
* Returns true if the string contains an upper case char, false otherwise. |
2186
|
|
|
* |
2187
|
|
|
* @param string $str <p>The input string.</p> |
2188
|
|
|
* |
2189
|
|
|
* @return bool whether or not the string contains an upper case character |
2190
|
|
|
*/ |
2191
|
12 |
|
public static function has_uppercase(string $str): bool |
2192
|
|
|
{ |
2193
|
12 |
|
return self::str_matches_pattern($str, '.*[[:upper:]]'); |
2194
|
|
|
} |
2195
|
|
|
|
2196
|
|
|
/** |
2197
|
|
|
* Converts a hexadecimal-value into an UTF-8 character. |
2198
|
|
|
* |
2199
|
|
|
* @param string $hexdec <p>The hexadecimal value.</p> |
2200
|
|
|
* |
2201
|
|
|
* @return false|string one single UTF-8 character |
2202
|
|
|
*/ |
2203
|
4 |
|
public static function hex_to_chr(string $hexdec) |
2204
|
|
|
{ |
2205
|
4 |
|
return self::decimal_to_chr(\hexdec($hexdec)); |
2206
|
|
|
} |
2207
|
|
|
|
2208
|
|
|
/** |
2209
|
|
|
* Converts hexadecimal U+xxxx code point representation to integer. |
2210
|
|
|
* |
2211
|
|
|
* INFO: opposite to UTF8::int_to_hex() |
2212
|
|
|
* |
2213
|
|
|
* @param string $hexDec <p>The hexadecimal code point representation.</p> |
2214
|
|
|
* |
2215
|
|
|
* @return false|int the code point, or false on failure |
2216
|
|
|
*/ |
2217
|
2 |
|
public static function hex_to_int($hexDec) |
2218
|
|
|
{ |
2219
|
|
|
// init |
2220
|
2 |
|
$hexDec = (string) $hexDec; |
2221
|
|
|
|
2222
|
2 |
|
if ($hexDec === '') { |
2223
|
2 |
|
return false; |
2224
|
|
|
} |
2225
|
|
|
|
2226
|
2 |
|
if (\preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) { |
2227
|
2 |
|
return \intval($match[1], 16); |
2228
|
|
|
} |
2229
|
|
|
|
2230
|
2 |
|
return false; |
2231
|
|
|
} |
2232
|
|
|
|
2233
|
|
|
/** |
2234
|
|
|
* alias for "UTF8::html_entity_decode()" |
2235
|
|
|
* |
2236
|
|
|
* @see UTF8::html_entity_decode() |
2237
|
|
|
* |
2238
|
|
|
* @param string $str |
2239
|
|
|
* @param int $flags |
2240
|
|
|
* @param string $encoding |
2241
|
|
|
* |
2242
|
|
|
* @return string |
2243
|
|
|
*/ |
2244
|
4 |
|
public static function html_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string |
2245
|
|
|
{ |
2246
|
4 |
|
return self::html_entity_decode($str, $flags, $encoding); |
2247
|
|
|
} |
2248
|
|
|
|
2249
|
|
|
/** |
2250
|
|
|
* Converts a UTF-8 string to a series of HTML numbered entities. |
2251
|
|
|
* |
2252
|
|
|
* INFO: opposite to UTF8::html_decode() |
2253
|
|
|
* |
2254
|
|
|
* @param string $str <p>The Unicode string to be encoded as numbered entities.</p> |
2255
|
|
|
* @param bool $keepAsciiChars [optional] <p>Keep ASCII chars.</p> |
2256
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
2257
|
|
|
* |
2258
|
|
|
* @return string HTML numbered entities |
2259
|
|
|
*/ |
2260
|
13 |
|
public static function html_encode(string $str, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string |
2261
|
|
|
{ |
2262
|
13 |
|
if ($str === '') { |
2263
|
4 |
|
return ''; |
2264
|
|
|
} |
2265
|
|
|
|
2266
|
13 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
2267
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
2268
|
|
|
} |
2269
|
|
|
|
2270
|
13 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
2271
|
|
|
self::checkForSupport(); |
2272
|
|
|
} |
2273
|
|
|
|
2274
|
|
|
// INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity |
2275
|
13 |
|
if (self::$SUPPORT['mbstring'] === true) { |
2276
|
13 |
|
$startCode = 0x00; |
2277
|
13 |
|
if ($keepAsciiChars === true) { |
2278
|
13 |
|
$startCode = 0x80; |
2279
|
|
|
} |
2280
|
|
|
|
2281
|
13 |
|
return \mb_encode_numericentity( |
2282
|
13 |
|
$str, |
2283
|
13 |
|
[$startCode, 0xfffff, 0, 0xfffff, 0], |
2284
|
13 |
|
$encoding |
2285
|
|
|
); |
2286
|
|
|
} |
2287
|
|
|
|
2288
|
|
|
// |
2289
|
|
|
// fallback via vanilla php |
2290
|
|
|
// |
2291
|
|
|
|
2292
|
|
|
return \implode( |
2293
|
|
|
'', |
2294
|
|
|
\array_map( |
2295
|
|
|
function ($chr) use ($keepAsciiChars, $encoding) { |
2296
|
|
|
return self::single_chr_html_encode($chr, $keepAsciiChars, $encoding); |
2297
|
|
|
}, |
2298
|
|
|
self::split($str) |
2299
|
|
|
) |
2300
|
|
|
); |
2301
|
|
|
} |
2302
|
|
|
|
2303
|
|
|
/** |
2304
|
|
|
* UTF-8 version of html_entity_decode() |
2305
|
|
|
* |
2306
|
|
|
* The reason we are not using html_entity_decode() by itself is because |
2307
|
|
|
* while it is not technically correct to leave out the semicolon |
2308
|
|
|
* at the end of an entity most browsers will still interpret the entity |
2309
|
|
|
* correctly. html_entity_decode() does not convert entities without |
2310
|
|
|
* semicolons, so we are left with our own little solution here. Bummer. |
2311
|
|
|
* |
2312
|
|
|
* Convert all HTML entities to their applicable characters |
2313
|
|
|
* |
2314
|
|
|
* INFO: opposite to UTF8::html_encode() |
2315
|
|
|
* |
2316
|
|
|
* @see http://php.net/manual/en/function.html-entity-decode.php |
2317
|
|
|
* |
2318
|
|
|
* @param string $str <p> |
2319
|
|
|
* The input string. |
2320
|
|
|
* </p> |
2321
|
|
|
* @param int $flags [optional] <p> |
2322
|
|
|
* A bitmask of one or more of the following flags, which specify how to handle quotes and |
2323
|
|
|
* which document type to use. The default is ENT_COMPAT | ENT_HTML401. |
2324
|
|
|
* <table> |
2325
|
|
|
* Available <i>flags</i> constants |
2326
|
|
|
* <tr valign="top"> |
2327
|
|
|
* <td>Constant Name</td> |
2328
|
|
|
* <td>Description</td> |
2329
|
|
|
* </tr> |
2330
|
|
|
* <tr valign="top"> |
2331
|
|
|
* <td><b>ENT_COMPAT</b></td> |
2332
|
|
|
* <td>Will convert double-quotes and leave single-quotes alone.</td> |
2333
|
|
|
* </tr> |
2334
|
|
|
* <tr valign="top"> |
2335
|
|
|
* <td><b>ENT_QUOTES</b></td> |
2336
|
|
|
* <td>Will convert both double and single quotes.</td> |
2337
|
|
|
* </tr> |
2338
|
|
|
* <tr valign="top"> |
2339
|
|
|
* <td><b>ENT_NOQUOTES</b></td> |
2340
|
|
|
* <td>Will leave both double and single quotes unconverted.</td> |
2341
|
|
|
* </tr> |
2342
|
|
|
* <tr valign="top"> |
2343
|
|
|
* <td><b>ENT_HTML401</b></td> |
2344
|
|
|
* <td> |
2345
|
|
|
* Handle code as HTML 4.01. |
2346
|
|
|
* </td> |
2347
|
|
|
* </tr> |
2348
|
|
|
* <tr valign="top"> |
2349
|
|
|
* <td><b>ENT_XML1</b></td> |
2350
|
|
|
* <td> |
2351
|
|
|
* Handle code as XML 1. |
2352
|
|
|
* </td> |
2353
|
|
|
* </tr> |
2354
|
|
|
* <tr valign="top"> |
2355
|
|
|
* <td><b>ENT_XHTML</b></td> |
2356
|
|
|
* <td> |
2357
|
|
|
* Handle code as XHTML. |
2358
|
|
|
* </td> |
2359
|
|
|
* </tr> |
2360
|
|
|
* <tr valign="top"> |
2361
|
|
|
* <td><b>ENT_HTML5</b></td> |
2362
|
|
|
* <td> |
2363
|
|
|
* Handle code as HTML 5. |
2364
|
|
|
* </td> |
2365
|
|
|
* </tr> |
2366
|
|
|
* </table> |
2367
|
|
|
* </p> |
2368
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
2369
|
|
|
* |
2370
|
|
|
* @return string the decoded string |
2371
|
|
|
*/ |
2372
|
40 |
|
public static function html_entity_decode(string $str, int $flags = null, string $encoding = 'UTF-8'): string |
2373
|
|
|
{ |
2374
|
40 |
|
if ($str === '') { |
2375
|
12 |
|
return ''; |
2376
|
|
|
} |
2377
|
|
|
|
2378
|
40 |
|
if (!isset($str[3])) { // examples: &; || &x; |
2379
|
19 |
|
return $str; |
2380
|
|
|
} |
2381
|
|
|
|
2382
|
|
|
if ( |
2383
|
39 |
|
\strpos($str, '&') === false |
2384
|
|
|
|| |
2385
|
|
|
( |
2386
|
39 |
|
\strpos($str, '&#') === false |
2387
|
|
|
&& |
2388
|
39 |
|
\strpos($str, ';') === false |
2389
|
|
|
) |
2390
|
|
|
) { |
2391
|
18 |
|
return $str; |
2392
|
|
|
} |
2393
|
|
|
|
2394
|
39 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
2395
|
9 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
2396
|
|
|
} |
2397
|
|
|
|
2398
|
39 |
|
if ($flags === null) { |
2399
|
10 |
|
$flags = \ENT_QUOTES | \ENT_HTML5; |
2400
|
|
|
} |
2401
|
|
|
|
2402
|
|
|
if ( |
2403
|
39 |
|
$encoding !== 'UTF-8' |
2404
|
|
|
&& |
2405
|
39 |
|
$encoding !== 'ISO-8859-1' |
2406
|
|
|
&& |
2407
|
39 |
|
$encoding !== 'WINDOWS-1252' |
2408
|
|
|
&& |
2409
|
39 |
|
self::$SUPPORT['mbstring'] === false |
2410
|
|
|
) { |
2411
|
|
|
\trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
2412
|
|
|
} |
2413
|
|
|
|
2414
|
39 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
2415
|
|
|
self::checkForSupport(); |
2416
|
|
|
} |
2417
|
|
|
|
2418
|
|
|
do { |
2419
|
39 |
|
$str_compare = $str; |
2420
|
|
|
|
2421
|
|
|
// INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity |
2422
|
39 |
|
if (self::$SUPPORT['mbstring'] === true) { |
2423
|
39 |
|
$str = \mb_decode_numericentity( |
2424
|
39 |
|
$str, |
2425
|
39 |
|
[0x80, 0xfffff, 0, 0xfffff, 0], |
2426
|
39 |
|
$encoding |
2427
|
|
|
); |
2428
|
|
|
} else { |
2429
|
|
|
$str = (string) \preg_replace_callback( |
2430
|
|
|
"/&#\d{2,6};/", |
2431
|
|
|
function ($matches) use ($encoding) { |
2432
|
|
|
// always fallback via symfony polyfill |
2433
|
|
|
$returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES'); |
2434
|
|
|
|
2435
|
|
|
if ($returnTmp !== '"' && $returnTmp !== "'") { |
2436
|
|
|
return $returnTmp; |
2437
|
|
|
} |
2438
|
|
|
|
2439
|
|
|
return $matches[0]; |
2440
|
|
|
}, |
2441
|
|
|
$str |
2442
|
|
|
); |
2443
|
|
|
} |
2444
|
|
|
|
2445
|
|
|
// decode numeric & UTF16 two byte entities |
2446
|
39 |
|
$str = \html_entity_decode( |
2447
|
39 |
|
\preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str), |
2448
|
39 |
|
$flags, |
2449
|
39 |
|
$encoding |
2450
|
|
|
); |
2451
|
39 |
|
} while ($str_compare !== $str); |
2452
|
|
|
|
2453
|
39 |
|
return $str; |
2454
|
|
|
} |
2455
|
|
|
|
2456
|
|
|
/** |
2457
|
|
|
* Create a escape html version of the string via "UTF8::htmlspecialchars()". |
2458
|
|
|
* |
2459
|
|
|
* @param string $str |
2460
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
2461
|
|
|
* |
2462
|
|
|
* @return string |
2463
|
|
|
*/ |
2464
|
6 |
|
public static function html_escape(string $str, string $encoding = 'UTF-8'): string |
2465
|
|
|
{ |
2466
|
6 |
|
return self::htmlspecialchars( |
2467
|
6 |
|
$str, |
2468
|
6 |
|
\ENT_QUOTES | \ENT_SUBSTITUTE, |
2469
|
6 |
|
$encoding |
2470
|
|
|
); |
2471
|
|
|
} |
2472
|
|
|
|
2473
|
|
|
/** |
2474
|
|
|
* Remove empty html-tag. |
2475
|
|
|
* |
2476
|
|
|
* e.g.: <tag></tag> |
2477
|
|
|
* |
2478
|
|
|
* @param string $str |
2479
|
|
|
* |
2480
|
|
|
* @return string |
2481
|
|
|
*/ |
2482
|
1 |
|
public static function html_stripe_empty_tags(string $str): string |
2483
|
|
|
{ |
2484
|
1 |
|
return (string) \preg_replace( |
2485
|
1 |
|
"/<[^\/>]*>(([\s]?)*|)<\/[^>]*>/iu", |
2486
|
1 |
|
'', |
2487
|
1 |
|
$str |
2488
|
|
|
); |
2489
|
|
|
} |
2490
|
|
|
|
2491
|
|
|
/** |
2492
|
|
|
* Convert all applicable characters to HTML entities: UTF-8 version of htmlentities() |
2493
|
|
|
* |
2494
|
|
|
* @see http://php.net/manual/en/function.htmlentities.php |
2495
|
|
|
* |
2496
|
|
|
* @param string $str <p> |
2497
|
|
|
* The input string. |
2498
|
|
|
* </p> |
2499
|
|
|
* @param int $flags [optional] <p> |
2500
|
|
|
* A bitmask of one or more of the following flags, which specify how to handle quotes, |
2501
|
|
|
* invalid code unit sequences and the used document type. The default is |
2502
|
|
|
* ENT_COMPAT | ENT_HTML401. |
2503
|
|
|
* <table> |
2504
|
|
|
* Available <i>flags</i> constants |
2505
|
|
|
* <tr valign="top"> |
2506
|
|
|
* <td>Constant Name</td> |
2507
|
|
|
* <td>Description</td> |
2508
|
|
|
* </tr> |
2509
|
|
|
* <tr valign="top"> |
2510
|
|
|
* <td><b>ENT_COMPAT</b></td> |
2511
|
|
|
* <td>Will convert double-quotes and leave single-quotes alone.</td> |
2512
|
|
|
* </tr> |
2513
|
|
|
* <tr valign="top"> |
2514
|
|
|
* <td><b>ENT_QUOTES</b></td> |
2515
|
|
|
* <td>Will convert both double and single quotes.</td> |
2516
|
|
|
* </tr> |
2517
|
|
|
* <tr valign="top"> |
2518
|
|
|
* <td><b>ENT_NOQUOTES</b></td> |
2519
|
|
|
* <td>Will leave both double and single quotes unconverted.</td> |
2520
|
|
|
* </tr> |
2521
|
|
|
* <tr valign="top"> |
2522
|
|
|
* <td><b>ENT_IGNORE</b></td> |
2523
|
|
|
* <td> |
2524
|
|
|
* Silently discard invalid code unit sequences instead of returning |
2525
|
|
|
* an empty string. Using this flag is discouraged as it |
2526
|
|
|
* may have security implications. |
2527
|
|
|
* </td> |
2528
|
|
|
* </tr> |
2529
|
|
|
* <tr valign="top"> |
2530
|
|
|
* <td><b>ENT_SUBSTITUTE</b></td> |
2531
|
|
|
* <td> |
2532
|
|
|
* Replace invalid code unit sequences with a Unicode Replacement Character |
2533
|
|
|
* U+FFFD (UTF-8) or &#38;#FFFD; (otherwise) instead of returning an empty string. |
2534
|
|
|
* </td> |
2535
|
|
|
* </tr> |
2536
|
|
|
* <tr valign="top"> |
2537
|
|
|
* <td><b>ENT_DISALLOWED</b></td> |
2538
|
|
|
* <td> |
2539
|
|
|
* Replace invalid code points for the given document type with a |
2540
|
|
|
* Unicode Replacement Character U+FFFD (UTF-8) or &#38;#FFFD; |
2541
|
|
|
* (otherwise) instead of leaving them as is. This may be useful, for |
2542
|
|
|
* instance, to ensure the well-formedness of XML documents with |
2543
|
|
|
* embedded external content. |
2544
|
|
|
* </td> |
2545
|
|
|
* </tr> |
2546
|
|
|
* <tr valign="top"> |
2547
|
|
|
* <td><b>ENT_HTML401</b></td> |
2548
|
|
|
* <td> |
2549
|
|
|
* Handle code as HTML 4.01. |
2550
|
|
|
* </td> |
2551
|
|
|
* </tr> |
2552
|
|
|
* <tr valign="top"> |
2553
|
|
|
* <td><b>ENT_XML1</b></td> |
2554
|
|
|
* <td> |
2555
|
|
|
* Handle code as XML 1. |
2556
|
|
|
* </td> |
2557
|
|
|
* </tr> |
2558
|
|
|
* <tr valign="top"> |
2559
|
|
|
* <td><b>ENT_XHTML</b></td> |
2560
|
|
|
* <td> |
2561
|
|
|
* Handle code as XHTML. |
2562
|
|
|
* </td> |
2563
|
|
|
* </tr> |
2564
|
|
|
* <tr valign="top"> |
2565
|
|
|
* <td><b>ENT_HTML5</b></td> |
2566
|
|
|
* <td> |
2567
|
|
|
* Handle code as HTML 5. |
2568
|
|
|
* </td> |
2569
|
|
|
* </tr> |
2570
|
|
|
* </table> |
2571
|
|
|
* </p> |
2572
|
|
|
* @param string $encoding [optional] <p> |
2573
|
|
|
* Like <b>htmlspecialchars</b>, |
2574
|
|
|
* <b>htmlentities</b> takes an optional third argument |
2575
|
|
|
* <i>encoding</i> which defines encoding used in |
2576
|
|
|
* conversion. |
2577
|
|
|
* Although this argument is technically optional, you are highly |
2578
|
|
|
* encouraged to specify the correct value for your code. |
2579
|
|
|
* </p> |
2580
|
|
|
* @param bool $double_encode [optional] <p> |
2581
|
|
|
* When <i>double_encode</i> is turned off PHP will not |
2582
|
|
|
* encode existing html entities. The default is to convert everything. |
2583
|
|
|
* </p> |
2584
|
|
|
* |
2585
|
|
|
* @return string the encoded string. |
2586
|
|
|
* </p> |
2587
|
|
|
* <p> |
2588
|
|
|
* If the input <i>string</i> contains an invalid code unit |
2589
|
|
|
* sequence within the given <i>encoding</i> an empty string |
2590
|
|
|
* will be returned, unless either the <b>ENT_IGNORE</b> or |
2591
|
|
|
* <b>ENT_SUBSTITUTE</b> flags are set |
2592
|
|
|
*/ |
2593
|
9 |
|
public static function htmlentities(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string |
2594
|
|
|
{ |
2595
|
9 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
2596
|
7 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
2597
|
|
|
} |
2598
|
|
|
|
2599
|
9 |
|
$str = \htmlentities($str, $flags, $encoding, $double_encode); |
2600
|
|
|
|
2601
|
|
|
/** |
2602
|
|
|
* PHP doesn't replace a backslash to its html entity since this is something |
2603
|
|
|
* that's mostly used to escape characters when inserting in a database. Since |
2604
|
|
|
* we're using a decent database layer, we don't need this shit and we're replacing |
2605
|
|
|
* the double backslashes by its' html entity equivalent. |
2606
|
|
|
* |
2607
|
|
|
* https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303 |
2608
|
|
|
*/ |
2609
|
9 |
|
$str = \str_replace('\\', '\', $str); |
2610
|
|
|
|
2611
|
9 |
|
return self::html_encode($str, true, $encoding); |
2612
|
|
|
} |
2613
|
|
|
|
2614
|
|
|
/** |
2615
|
|
|
* Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars() |
2616
|
|
|
* |
2617
|
|
|
* INFO: Take a look at "UTF8::htmlentities()" |
2618
|
|
|
* |
2619
|
|
|
* @see http://php.net/manual/en/function.htmlspecialchars.php |
2620
|
|
|
* |
2621
|
|
|
* @param string $str <p> |
2622
|
|
|
* The string being converted. |
2623
|
|
|
* </p> |
2624
|
|
|
* @param int $flags [optional] <p> |
2625
|
|
|
* A bitmask of one or more of the following flags, which specify how to handle quotes, |
2626
|
|
|
* invalid code unit sequences and the used document type. The default is |
2627
|
|
|
* ENT_COMPAT | ENT_HTML401. |
2628
|
|
|
* <table> |
2629
|
|
|
* Available <i>flags</i> constants |
2630
|
|
|
* <tr valign="top"> |
2631
|
|
|
* <td>Constant Name</td> |
2632
|
|
|
* <td>Description</td> |
2633
|
|
|
* </tr> |
2634
|
|
|
* <tr valign="top"> |
2635
|
|
|
* <td><b>ENT_COMPAT</b></td> |
2636
|
|
|
* <td>Will convert double-quotes and leave single-quotes alone.</td> |
2637
|
|
|
* </tr> |
2638
|
|
|
* <tr valign="top"> |
2639
|
|
|
* <td><b>ENT_QUOTES</b></td> |
2640
|
|
|
* <td>Will convert both double and single quotes.</td> |
2641
|
|
|
* </tr> |
2642
|
|
|
* <tr valign="top"> |
2643
|
|
|
* <td><b>ENT_NOQUOTES</b></td> |
2644
|
|
|
* <td>Will leave both double and single quotes unconverted.</td> |
2645
|
|
|
* </tr> |
2646
|
|
|
* <tr valign="top"> |
2647
|
|
|
* <td><b>ENT_IGNORE</b></td> |
2648
|
|
|
* <td> |
2649
|
|
|
* Silently discard invalid code unit sequences instead of returning |
2650
|
|
|
* an empty string. Using this flag is discouraged as it |
2651
|
|
|
* may have security implications. |
2652
|
|
|
* </td> |
2653
|
|
|
* </tr> |
2654
|
|
|
* <tr valign="top"> |
2655
|
|
|
* <td><b>ENT_SUBSTITUTE</b></td> |
2656
|
|
|
* <td> |
2657
|
|
|
* Replace invalid code unit sequences with a Unicode Replacement Character |
2658
|
|
|
* U+FFFD (UTF-8) or &#38;#FFFD; (otherwise) instead of returning an empty string. |
2659
|
|
|
* </td> |
2660
|
|
|
* </tr> |
2661
|
|
|
* <tr valign="top"> |
2662
|
|
|
* <td><b>ENT_DISALLOWED</b></td> |
2663
|
|
|
* <td> |
2664
|
|
|
* Replace invalid code points for the given document type with a |
2665
|
|
|
* Unicode Replacement Character U+FFFD (UTF-8) or &#38;#FFFD; |
2666
|
|
|
* (otherwise) instead of leaving them as is. This may be useful, for |
2667
|
|
|
* instance, to ensure the well-formedness of XML documents with |
2668
|
|
|
* embedded external content. |
2669
|
|
|
* </td> |
2670
|
|
|
* </tr> |
2671
|
|
|
* <tr valign="top"> |
2672
|
|
|
* <td><b>ENT_HTML401</b></td> |
2673
|
|
|
* <td> |
2674
|
|
|
* Handle code as HTML 4.01. |
2675
|
|
|
* </td> |
2676
|
|
|
* </tr> |
2677
|
|
|
* <tr valign="top"> |
2678
|
|
|
* <td><b>ENT_XML1</b></td> |
2679
|
|
|
* <td> |
2680
|
|
|
* Handle code as XML 1. |
2681
|
|
|
* </td> |
2682
|
|
|
* </tr> |
2683
|
|
|
* <tr valign="top"> |
2684
|
|
|
* <td><b>ENT_XHTML</b></td> |
2685
|
|
|
* <td> |
2686
|
|
|
* Handle code as XHTML. |
2687
|
|
|
* </td> |
2688
|
|
|
* </tr> |
2689
|
|
|
* <tr valign="top"> |
2690
|
|
|
* <td><b>ENT_HTML5</b></td> |
2691
|
|
|
* <td> |
2692
|
|
|
* Handle code as HTML 5. |
2693
|
|
|
* </td> |
2694
|
|
|
* </tr> |
2695
|
|
|
* </table> |
2696
|
|
|
* </p> |
2697
|
|
|
* @param string $encoding [optional] <p> |
2698
|
|
|
* Defines encoding used in conversion. |
2699
|
|
|
* </p> |
2700
|
|
|
* <p> |
2701
|
|
|
* For the purposes of this function, the encodings |
2702
|
|
|
* ISO-8859-1, ISO-8859-15, |
2703
|
|
|
* UTF-8, cp866, |
2704
|
|
|
* cp1251, cp1252, and |
2705
|
|
|
* KOI8-R are effectively equivalent, provided the |
2706
|
|
|
* <i>string</i> itself is valid for the encoding, as |
2707
|
|
|
* the characters affected by <b>htmlspecialchars</b> occupy |
2708
|
|
|
* the same positions in all of these encodings. |
2709
|
|
|
* </p> |
2710
|
|
|
* @param bool $double_encode [optional] <p> |
2711
|
|
|
* When <i>double_encode</i> is turned off PHP will not |
2712
|
|
|
* encode existing html entities, the default is to convert everything. |
2713
|
|
|
* </p> |
2714
|
|
|
* |
2715
|
|
|
* @return string the converted string. |
2716
|
|
|
* </p> |
2717
|
|
|
* <p> |
2718
|
|
|
* If the input <i>string</i> contains an invalid code unit |
2719
|
|
|
* sequence within the given <i>encoding</i> an empty string |
2720
|
|
|
* will be returned, unless either the <b>ENT_IGNORE</b> or |
2721
|
|
|
* <b>ENT_SUBSTITUTE</b> flags are set |
2722
|
|
|
*/ |
2723
|
8 |
|
public static function htmlspecialchars(string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true): string |
2724
|
|
|
{ |
2725
|
8 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
2726
|
8 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
2727
|
|
|
} |
2728
|
|
|
|
2729
|
8 |
|
return \htmlspecialchars($str, $flags, $encoding, $double_encode); |
2730
|
|
|
} |
2731
|
|
|
|
2732
|
|
|
/** |
2733
|
|
|
* Checks whether iconv is available on the server. |
2734
|
|
|
* |
2735
|
|
|
* @return bool |
2736
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
2737
|
|
|
*/ |
2738
|
|
|
public static function iconv_loaded(): bool |
2739
|
|
|
{ |
2740
|
|
|
return \extension_loaded('iconv') ? true : false; |
2741
|
|
|
} |
2742
|
|
|
|
2743
|
|
|
/** |
2744
|
|
|
* alias for "UTF8::decimal_to_chr()" |
2745
|
|
|
* |
2746
|
|
|
* @see UTF8::decimal_to_chr() |
2747
|
|
|
* |
2748
|
|
|
* @param mixed $int |
2749
|
|
|
* |
2750
|
|
|
* @return string |
2751
|
|
|
*/ |
2752
|
4 |
|
public static function int_to_chr($int): string |
2753
|
|
|
{ |
2754
|
4 |
|
return self::decimal_to_chr($int); |
2755
|
|
|
} |
2756
|
|
|
|
2757
|
|
|
/** |
2758
|
|
|
* Converts Integer to hexadecimal U+xxxx code point representation. |
2759
|
|
|
* |
2760
|
|
|
* INFO: opposite to UTF8::hex_to_int() |
2761
|
|
|
* |
2762
|
|
|
* @param int $int <p>The integer to be converted to hexadecimal code point.</p> |
2763
|
|
|
* @param string $pfix [optional] |
2764
|
|
|
* |
2765
|
|
|
* @return string the code point, or empty string on failure |
2766
|
|
|
*/ |
2767
|
6 |
|
public static function int_to_hex(int $int, string $pfix = 'U+'): string |
2768
|
|
|
{ |
2769
|
6 |
|
$hex = \dechex($int); |
2770
|
|
|
|
2771
|
6 |
|
$hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex); |
2772
|
|
|
|
2773
|
6 |
|
return $pfix . $hex . ''; |
2774
|
|
|
} |
2775
|
|
|
|
2776
|
|
|
/** |
2777
|
|
|
* Checks whether intl-char is available on the server. |
2778
|
|
|
* |
2779
|
|
|
* @return bool |
2780
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
2781
|
|
|
*/ |
2782
|
|
|
public static function intlChar_loaded(): bool |
2783
|
|
|
{ |
2784
|
|
|
return \class_exists('IntlChar'); |
2785
|
|
|
} |
2786
|
|
|
|
2787
|
|
|
/** |
2788
|
|
|
* Checks whether intl is available on the server. |
2789
|
|
|
* |
2790
|
|
|
* @return bool |
2791
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
2792
|
|
|
*/ |
2793
|
5 |
|
public static function intl_loaded(): bool |
2794
|
|
|
{ |
2795
|
5 |
|
return \extension_loaded('intl'); |
2796
|
|
|
} |
2797
|
|
|
|
2798
|
|
|
/** |
2799
|
|
|
* alias for "UTF8::is_ascii()" |
2800
|
|
|
* |
2801
|
|
|
* @see UTF8::is_ascii() |
2802
|
|
|
* |
2803
|
|
|
* @param string $str |
2804
|
|
|
* |
2805
|
|
|
* @return bool |
2806
|
|
|
* |
2807
|
|
|
* @deprecated <p>use "UTF8::is_ascii()"</p> |
2808
|
|
|
*/ |
2809
|
2 |
|
public static function isAscii(string $str): bool |
2810
|
|
|
{ |
2811
|
2 |
|
return self::is_ascii($str); |
2812
|
|
|
} |
2813
|
|
|
|
2814
|
|
|
/** |
2815
|
|
|
* alias for "UTF8::is_base64()" |
2816
|
|
|
* |
2817
|
|
|
* @see UTF8::is_base64() |
2818
|
|
|
* |
2819
|
|
|
* @param string $str |
2820
|
|
|
* |
2821
|
|
|
* @return bool |
2822
|
|
|
* |
2823
|
|
|
* @deprecated <p>use "UTF8::is_base64()"</p> |
2824
|
|
|
*/ |
2825
|
2 |
|
public static function isBase64($str): bool |
2826
|
|
|
{ |
2827
|
2 |
|
return self::is_base64($str); |
2828
|
|
|
} |
2829
|
|
|
|
2830
|
|
|
/** |
2831
|
|
|
* alias for "UTF8::is_binary()" |
2832
|
|
|
* |
2833
|
|
|
* @see UTF8::is_binary() |
2834
|
|
|
* |
2835
|
|
|
* @param mixed $str |
2836
|
|
|
* @param bool $strict |
2837
|
|
|
* |
2838
|
|
|
* @return bool |
2839
|
|
|
* |
2840
|
|
|
* @deprecated <p>use "UTF8::is_binary()"</p> |
2841
|
|
|
*/ |
2842
|
4 |
|
public static function isBinary($str, $strict = false): bool |
2843
|
|
|
{ |
2844
|
4 |
|
return self::is_binary($str, $strict); |
2845
|
|
|
} |
2846
|
|
|
|
2847
|
|
|
/** |
2848
|
|
|
* alias for "UTF8::is_bom()" |
2849
|
|
|
* |
2850
|
|
|
* @see UTF8::is_bom() |
2851
|
|
|
* |
2852
|
|
|
* @param string $utf8_chr |
2853
|
|
|
* |
2854
|
|
|
* @return bool |
2855
|
|
|
* |
2856
|
|
|
* @deprecated <p>use "UTF8::is_bom()"</p> |
2857
|
|
|
*/ |
2858
|
2 |
|
public static function isBom(string $utf8_chr): bool |
2859
|
|
|
{ |
2860
|
2 |
|
return self::is_bom($utf8_chr); |
2861
|
|
|
} |
2862
|
|
|
|
2863
|
|
|
/** |
2864
|
|
|
* alias for "UTF8::is_html()" |
2865
|
|
|
* |
2866
|
|
|
* @see UTF8::is_html() |
2867
|
|
|
* |
2868
|
|
|
* @param string $str |
2869
|
|
|
* |
2870
|
|
|
* @return bool |
2871
|
|
|
* |
2872
|
|
|
* @deprecated <p>use "UTF8::is_html()"</p> |
2873
|
|
|
*/ |
2874
|
2 |
|
public static function isHtml(string $str): bool |
2875
|
|
|
{ |
2876
|
2 |
|
return self::is_html($str); |
2877
|
|
|
} |
2878
|
|
|
|
2879
|
|
|
/** |
2880
|
|
|
* alias for "UTF8::is_json()" |
2881
|
|
|
* |
2882
|
|
|
* @see UTF8::is_json() |
2883
|
|
|
* |
2884
|
|
|
* @param string $str |
2885
|
|
|
* |
2886
|
|
|
* @return bool |
2887
|
|
|
* |
2888
|
|
|
* @deprecated <p>use "UTF8::is_json()"</p> |
2889
|
|
|
*/ |
2890
|
|
|
public static function isJson(string $str): bool |
2891
|
|
|
{ |
2892
|
|
|
return self::is_json($str); |
2893
|
|
|
} |
2894
|
|
|
|
2895
|
|
|
/** |
2896
|
|
|
* alias for "UTF8::is_utf16()" |
2897
|
|
|
* |
2898
|
|
|
* @see UTF8::is_utf16() |
2899
|
|
|
* |
2900
|
|
|
* @param mixed $str |
2901
|
|
|
* |
2902
|
|
|
* @return false|int |
2903
|
|
|
* <strong>false</strong> if is't not UTF16,<br> |
2904
|
|
|
* <strong>1</strong> for UTF-16LE,<br> |
2905
|
|
|
* <strong>2</strong> for UTF-16BE |
2906
|
|
|
* |
2907
|
|
|
* @deprecated <p>use "UTF8::is_utf16()"</p> |
2908
|
|
|
*/ |
2909
|
2 |
|
public static function isUtf16($str) |
2910
|
|
|
{ |
2911
|
2 |
|
return self::is_utf16($str); |
2912
|
|
|
} |
2913
|
|
|
|
2914
|
|
|
/** |
2915
|
|
|
* alias for "UTF8::is_utf32()" |
2916
|
|
|
* |
2917
|
|
|
* @see UTF8::is_utf32() |
2918
|
|
|
* |
2919
|
|
|
* @param mixed $str |
2920
|
|
|
* |
2921
|
|
|
* @return false|int |
2922
|
|
|
* <strong>false</strong> if is't not UTF16, |
2923
|
|
|
* <strong>1</strong> for UTF-32LE, |
2924
|
|
|
* <strong>2</strong> for UTF-32BE |
2925
|
|
|
* |
2926
|
|
|
* @deprecated <p>use "UTF8::is_utf32()"</p> |
2927
|
|
|
*/ |
2928
|
2 |
|
public static function isUtf32($str) |
2929
|
|
|
{ |
2930
|
2 |
|
return self::is_utf32($str); |
2931
|
|
|
} |
2932
|
|
|
|
2933
|
|
|
/** |
2934
|
|
|
* alias for "UTF8::is_utf8()" |
2935
|
|
|
* |
2936
|
|
|
* @see UTF8::is_utf8() |
2937
|
|
|
* |
2938
|
|
|
* @param string $str |
2939
|
|
|
* @param bool $strict |
2940
|
|
|
* |
2941
|
|
|
* @return bool |
2942
|
|
|
* |
2943
|
|
|
* @deprecated <p>use "UTF8::is_utf8()"</p> |
2944
|
|
|
*/ |
2945
|
17 |
|
public static function isUtf8($str, $strict = false): bool |
2946
|
|
|
{ |
2947
|
17 |
|
return self::is_utf8($str, $strict); |
2948
|
|
|
} |
2949
|
|
|
|
2950
|
|
|
/** |
2951
|
|
|
* Returns true if the string contains only alphabetic chars, false otherwise. |
2952
|
|
|
* |
2953
|
|
|
* @param string $str |
2954
|
|
|
* |
2955
|
|
|
* @return bool |
2956
|
|
|
* Whether or not $str contains only alphabetic chars |
2957
|
|
|
*/ |
2958
|
10 |
|
public static function is_alpha(string $str): bool |
2959
|
|
|
{ |
2960
|
10 |
|
return self::str_matches_pattern($str, '^[[:alpha:]]*$'); |
2961
|
|
|
} |
2962
|
|
|
|
2963
|
|
|
/** |
2964
|
|
|
* Returns true if the string contains only alphabetic and numeric chars, false otherwise. |
2965
|
|
|
* |
2966
|
|
|
* @param string $str |
2967
|
|
|
* |
2968
|
|
|
* @return bool |
2969
|
|
|
* Whether or not $str contains only alphanumeric chars |
2970
|
|
|
*/ |
2971
|
13 |
|
public static function is_alphanumeric(string $str): bool |
2972
|
|
|
{ |
2973
|
13 |
|
return self::str_matches_pattern($str, '^[[:alnum:]]*$'); |
2974
|
|
|
} |
2975
|
|
|
|
2976
|
|
|
/** |
2977
|
|
|
* Checks if a string is 7 bit ASCII. |
2978
|
|
|
* |
2979
|
|
|
* @param string $str <p>The string to check.</p> |
2980
|
|
|
* |
2981
|
|
|
* @return bool |
2982
|
|
|
* <strong>true</strong> if it is ASCII<br> |
2983
|
|
|
* <strong>false</strong> otherwise |
2984
|
|
|
*/ |
2985
|
201 |
|
public static function is_ascii(string $str): bool |
2986
|
|
|
{ |
2987
|
201 |
|
if ($str === '') { |
2988
|
10 |
|
return true; |
2989
|
|
|
} |
2990
|
|
|
|
2991
|
200 |
|
return !\preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str); |
2992
|
|
|
} |
2993
|
|
|
|
2994
|
|
|
/** |
2995
|
|
|
* Returns true if the string is base64 encoded, false otherwise. |
2996
|
|
|
* |
2997
|
|
|
* @param string $str <p>The input string.</p> |
2998
|
|
|
* |
2999
|
|
|
* @return bool whether or not $str is base64 encoded |
3000
|
|
|
*/ |
3001
|
9 |
|
public static function is_base64($str): bool |
3002
|
|
|
{ |
3003
|
9 |
|
if ($str === '') { |
3004
|
3 |
|
return false; |
3005
|
|
|
} |
3006
|
|
|
|
3007
|
8 |
|
if (\is_string($str) === false) { |
|
|
|
|
3008
|
2 |
|
return false; |
3009
|
|
|
} |
3010
|
|
|
|
3011
|
8 |
|
$base64String = (string) \base64_decode($str, true); |
3012
|
|
|
|
3013
|
8 |
|
return $base64String && \base64_encode($base64String) === $str; |
3014
|
|
|
} |
3015
|
|
|
|
3016
|
|
|
/** |
3017
|
|
|
* Check if the input is binary... (is look like a hack). |
3018
|
|
|
* |
3019
|
|
|
* @param mixed $input |
3020
|
|
|
* @param bool $strict |
3021
|
|
|
* |
3022
|
|
|
* @return bool |
3023
|
|
|
*/ |
3024
|
39 |
|
public static function is_binary($input, bool $strict = false): bool |
3025
|
|
|
{ |
3026
|
39 |
|
$input = (string) $input; |
3027
|
39 |
|
if ($input === '') { |
3028
|
10 |
|
return false; |
3029
|
|
|
} |
3030
|
|
|
|
3031
|
39 |
|
if (\preg_match('~^[01]+$~', $input)) { |
3032
|
12 |
|
return true; |
3033
|
|
|
} |
3034
|
|
|
|
3035
|
39 |
|
$ext = self::get_file_type($input); |
3036
|
39 |
|
if ($ext['type'] === 'binary') { |
3037
|
7 |
|
return true; |
3038
|
|
|
} |
3039
|
|
|
|
3040
|
36 |
|
$testLength = self::strlen_in_byte($input); |
3041
|
36 |
|
if ($testLength) { |
3042
|
36 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
3043
|
|
|
self::checkForSupport(); |
3044
|
|
|
} |
3045
|
|
|
|
3046
|
36 |
|
$testNull = self::substr_count_in_byte($input, "\x0", 0, $testLength); |
3047
|
36 |
|
if (($testNull / $testLength) > 0.256) { |
3048
|
12 |
|
return true; |
3049
|
|
|
} |
3050
|
|
|
} |
3051
|
|
|
|
3052
|
34 |
|
if ($strict === true) { |
3053
|
34 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
3054
|
|
|
self::checkForSupport(); |
3055
|
|
|
} |
3056
|
|
|
|
3057
|
34 |
|
if (self::$SUPPORT['finfo'] === false) { |
3058
|
|
|
throw new \RuntimeException('ext-fileinfo: is not installed'); |
3059
|
|
|
} |
3060
|
|
|
|
3061
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3062
|
34 |
|
$finfo = new \finfo(\FILEINFO_MIME_ENCODING); |
3063
|
34 |
|
$finfo_encoding = $finfo->buffer($input); |
3064
|
34 |
|
if ($finfo_encoding && $finfo_encoding === 'binary') { |
3065
|
14 |
|
return true; |
3066
|
|
|
} |
3067
|
|
|
} |
3068
|
|
|
|
3069
|
30 |
|
return false; |
3070
|
|
|
} |
3071
|
|
|
|
3072
|
|
|
/** |
3073
|
|
|
* Check if the file is binary. |
3074
|
|
|
* |
3075
|
|
|
* @param string $file |
3076
|
|
|
* |
3077
|
|
|
* @return bool |
3078
|
|
|
*/ |
3079
|
6 |
|
public static function is_binary_file($file): bool |
3080
|
|
|
{ |
3081
|
|
|
// init |
3082
|
6 |
|
$block = ''; |
3083
|
|
|
|
3084
|
6 |
|
$fp = \fopen($file, 'rb'); |
3085
|
6 |
|
if (\is_resource($fp)) { |
3086
|
6 |
|
$block = \fread($fp, 512); |
3087
|
6 |
|
\fclose($fp); |
3088
|
|
|
} |
3089
|
|
|
|
3090
|
6 |
|
if ($block === '') { |
3091
|
2 |
|
return false; |
3092
|
|
|
} |
3093
|
|
|
|
3094
|
6 |
|
return self::is_binary($block, true); |
3095
|
|
|
} |
3096
|
|
|
|
3097
|
|
|
/** |
3098
|
|
|
* Returns true if the string contains only whitespace chars, false otherwise. |
3099
|
|
|
* |
3100
|
|
|
* @param string $str |
3101
|
|
|
* |
3102
|
|
|
* @return bool |
3103
|
|
|
* Whether or not $str contains only whitespace characters |
3104
|
|
|
*/ |
3105
|
15 |
|
public static function is_blank(string $str): bool |
3106
|
|
|
{ |
3107
|
15 |
|
return self::str_matches_pattern($str, '^[[:space:]]*$'); |
3108
|
|
|
} |
3109
|
|
|
|
3110
|
|
|
/** |
3111
|
|
|
* Checks if the given string is equal to any "Byte Order Mark". |
3112
|
|
|
* |
3113
|
|
|
* WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string. |
3114
|
|
|
* |
3115
|
|
|
* @param string $str <p>The input string.</p> |
3116
|
|
|
* |
3117
|
|
|
* @return bool |
3118
|
|
|
* <strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise |
3119
|
|
|
*/ |
3120
|
2 |
|
public static function is_bom($str): bool |
3121
|
|
|
{ |
3122
|
2 |
|
foreach (self::$BOM as $bomString => $bomByteLength) { |
3123
|
2 |
|
if ($str === $bomString) { |
3124
|
2 |
|
return true; |
3125
|
|
|
} |
3126
|
|
|
} |
3127
|
|
|
|
3128
|
2 |
|
return false; |
3129
|
|
|
} |
3130
|
|
|
|
3131
|
|
|
/** |
3132
|
|
|
* Determine whether the string is considered to be empty. |
3133
|
|
|
* |
3134
|
|
|
* A variable is considered empty if it does not exist or if its value equals FALSE. |
3135
|
|
|
* empty() does not generate a warning if the variable does not exist. |
3136
|
|
|
* |
3137
|
|
|
* @param mixed $str |
3138
|
|
|
* |
3139
|
|
|
* @return bool whether or not $str is empty() |
3140
|
|
|
*/ |
3141
|
|
|
public static function is_empty($str): bool |
3142
|
|
|
{ |
3143
|
|
|
return empty($str); |
3144
|
|
|
} |
3145
|
|
|
|
3146
|
|
|
/** |
3147
|
|
|
* Returns true if the string contains only hexadecimal chars, false otherwise. |
3148
|
|
|
* |
3149
|
|
|
* @param string $str |
3150
|
|
|
* |
3151
|
|
|
* @return bool |
3152
|
|
|
* Whether or not $str contains only hexadecimal chars |
3153
|
|
|
*/ |
3154
|
13 |
|
public static function is_hexadecimal(string $str): bool |
3155
|
|
|
{ |
3156
|
13 |
|
return self::str_matches_pattern($str, '^[[:xdigit:]]*$'); |
3157
|
|
|
} |
3158
|
|
|
|
3159
|
|
|
/** |
3160
|
|
|
* Check if the string contains any html-tags <lall>. |
3161
|
|
|
* |
3162
|
|
|
* @param string $str <p>The input string.</p> |
3163
|
|
|
* |
3164
|
|
|
* @return bool |
3165
|
|
|
*/ |
3166
|
3 |
|
public static function is_html(string $str): bool |
3167
|
|
|
{ |
3168
|
3 |
|
if ($str === '') { |
3169
|
3 |
|
return false; |
3170
|
|
|
} |
3171
|
|
|
|
3172
|
|
|
// init |
3173
|
3 |
|
$matches = []; |
3174
|
|
|
|
3175
|
3 |
|
\preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches); |
3176
|
|
|
|
3177
|
3 |
|
return !(\count($matches) === 0); |
3178
|
|
|
} |
3179
|
|
|
|
3180
|
|
|
/** |
3181
|
|
|
* Try to check if "$str" is an json-string. |
3182
|
|
|
* |
3183
|
|
|
* @param string $str <p>The input string.</p> |
3184
|
|
|
* |
3185
|
|
|
* @return bool |
3186
|
|
|
*/ |
3187
|
22 |
|
public static function is_json(string $str): bool |
3188
|
|
|
{ |
3189
|
22 |
|
if ($str === '') { |
3190
|
3 |
|
return false; |
3191
|
|
|
} |
3192
|
|
|
|
3193
|
21 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
3194
|
|
|
self::checkForSupport(); |
3195
|
|
|
} |
3196
|
|
|
|
3197
|
21 |
|
if (self::$SUPPORT['json'] === false) { |
3198
|
|
|
throw new \RuntimeException('ext-json: is not installed'); |
3199
|
|
|
} |
3200
|
|
|
|
3201
|
21 |
|
$json = self::json_decode($str); |
3202
|
|
|
|
3203
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3204
|
|
|
return ( |
3205
|
21 |
|
\is_object($json) === true |
3206
|
|
|
|| |
3207
|
21 |
|
\is_array($json) === true |
3208
|
|
|
) |
3209
|
|
|
&& |
3210
|
21 |
|
\json_last_error() === \JSON_ERROR_NONE; |
3211
|
|
|
} |
3212
|
|
|
|
3213
|
|
|
/** |
3214
|
|
|
* @param string $str |
3215
|
|
|
* |
3216
|
|
|
* @return bool |
3217
|
|
|
*/ |
3218
|
8 |
|
public static function is_lowercase(string $str): bool |
3219
|
|
|
{ |
3220
|
8 |
|
if (self::str_matches_pattern($str, '^[[:lower:]]*$')) { |
3221
|
3 |
|
return true; |
3222
|
|
|
} |
3223
|
|
|
|
3224
|
5 |
|
return false; |
3225
|
|
|
} |
3226
|
|
|
|
3227
|
|
|
/** |
3228
|
|
|
* Returns true if the string is serialized, false otherwise. |
3229
|
|
|
* |
3230
|
|
|
* @param string $str |
3231
|
|
|
* |
3232
|
|
|
* @return bool whether or not $str is serialized |
3233
|
|
|
*/ |
3234
|
7 |
|
public static function is_serialized(string $str): bool |
3235
|
|
|
{ |
3236
|
7 |
|
if ($str === '') { |
3237
|
1 |
|
return false; |
3238
|
|
|
} |
3239
|
|
|
|
3240
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
3241
|
|
|
/** @noinspection UnserializeExploitsInspection */ |
3242
|
6 |
|
return $str === 'b:0;' |
3243
|
|
|
|| |
3244
|
6 |
|
@\unserialize($str) !== false; |
3245
|
|
|
} |
3246
|
|
|
|
3247
|
|
|
/** |
3248
|
|
|
* Returns true if the string contains only lower case chars, false |
3249
|
|
|
* otherwise. |
3250
|
|
|
* |
3251
|
|
|
* @param string $str <p>The input string.</p> |
3252
|
|
|
* |
3253
|
|
|
* @return bool |
3254
|
|
|
* Whether or not $str contains only lower case characters |
3255
|
|
|
*/ |
3256
|
8 |
|
public static function is_uppercase(string $str): bool |
3257
|
|
|
{ |
3258
|
8 |
|
return self::str_matches_pattern($str, '^[[:upper:]]*$'); |
3259
|
|
|
} |
3260
|
|
|
|
3261
|
|
|
/** |
3262
|
|
|
* Check if the string is UTF-16. |
3263
|
|
|
* |
3264
|
|
|
* @param mixed $str <p>The input string.</p> |
3265
|
|
|
* @param bool $checkIfStringIsBinary |
3266
|
|
|
* |
3267
|
|
|
* @return false|int |
3268
|
|
|
* <strong>false</strong> if is't not UTF-16,<br> |
3269
|
|
|
* <strong>1</strong> for UTF-16LE,<br> |
3270
|
|
|
* <strong>2</strong> for UTF-16BE |
3271
|
|
|
*/ |
3272
|
21 |
|
public static function is_utf16($str, $checkIfStringIsBinary = true) |
3273
|
|
|
{ |
3274
|
|
|
// init |
3275
|
21 |
|
$str = (string) $str; |
3276
|
21 |
|
$strChars = []; |
3277
|
|
|
|
3278
|
|
|
if ( |
3279
|
21 |
|
$checkIfStringIsBinary === true |
3280
|
|
|
&& |
3281
|
21 |
|
self::is_binary($str, true) === false |
3282
|
|
|
) { |
3283
|
2 |
|
return false; |
3284
|
|
|
} |
3285
|
|
|
|
3286
|
21 |
|
if (self::$SUPPORT['mbstring'] === false) { |
3287
|
2 |
|
\trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING); |
3288
|
|
|
} |
3289
|
|
|
|
3290
|
21 |
|
$str = self::remove_bom($str); |
3291
|
|
|
|
3292
|
21 |
|
$maybeUTF16LE = 0; |
3293
|
21 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE'); |
3294
|
21 |
|
if ($test) { |
3295
|
15 |
|
$test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8'); |
3296
|
15 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE'); |
3297
|
15 |
|
if ($test3 === $test) { |
3298
|
15 |
|
if (\count($strChars) === 0) { |
3299
|
15 |
|
$strChars = self::count_chars($str, true); |
3300
|
|
|
} |
3301
|
15 |
|
foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) { |
3302
|
15 |
|
if (\in_array($test3char, $strChars, true) === true) { |
3303
|
15 |
|
$maybeUTF16LE++; |
3304
|
|
|
} |
3305
|
|
|
} |
3306
|
|
|
} |
3307
|
|
|
} |
3308
|
|
|
|
3309
|
21 |
|
$maybeUTF16BE = 0; |
3310
|
21 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE'); |
3311
|
21 |
|
if ($test) { |
3312
|
15 |
|
$test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8'); |
3313
|
15 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE'); |
3314
|
15 |
|
if ($test3 === $test) { |
3315
|
15 |
|
if (\count($strChars) === 0) { |
3316
|
7 |
|
$strChars = self::count_chars($str, true); |
3317
|
|
|
} |
3318
|
15 |
|
foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) { |
3319
|
15 |
|
if (\in_array($test3char, $strChars, true) === true) { |
3320
|
15 |
|
$maybeUTF16BE++; |
3321
|
|
|
} |
3322
|
|
|
} |
3323
|
|
|
} |
3324
|
|
|
} |
3325
|
|
|
|
3326
|
21 |
|
if ($maybeUTF16BE !== $maybeUTF16LE) { |
3327
|
6 |
|
if ($maybeUTF16LE > $maybeUTF16BE) { |
3328
|
4 |
|
return 1; |
3329
|
|
|
} |
3330
|
|
|
|
3331
|
6 |
|
return 2; |
3332
|
|
|
} |
3333
|
|
|
|
3334
|
17 |
|
return false; |
3335
|
|
|
} |
3336
|
|
|
|
3337
|
|
|
/** |
3338
|
|
|
* Check if the string is UTF-32. |
3339
|
|
|
* |
3340
|
|
|
* @param mixed $str <p>The input string.</p> |
3341
|
|
|
* @param bool $checkIfStringIsBinary |
3342
|
|
|
* |
3343
|
|
|
* @return false|int |
3344
|
|
|
* <strong>false</strong> if is't not UTF-32,<br> |
3345
|
|
|
* <strong>1</strong> for UTF-32LE,<br> |
3346
|
|
|
* <strong>2</strong> for UTF-32BE |
3347
|
|
|
*/ |
3348
|
17 |
|
public static function is_utf32($str, $checkIfStringIsBinary = true) |
3349
|
|
|
{ |
3350
|
|
|
// init |
3351
|
17 |
|
$str = (string) $str; |
3352
|
17 |
|
$strChars = []; |
3353
|
|
|
|
3354
|
|
|
if ( |
3355
|
17 |
|
$checkIfStringIsBinary === true |
3356
|
|
|
&& |
3357
|
17 |
|
self::is_binary($str, true) === false |
3358
|
|
|
) { |
3359
|
2 |
|
return false; |
3360
|
|
|
} |
3361
|
|
|
|
3362
|
17 |
|
if (self::$SUPPORT['mbstring'] === false) { |
3363
|
2 |
|
\trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING); |
3364
|
|
|
} |
3365
|
|
|
|
3366
|
17 |
|
$str = self::remove_bom($str); |
3367
|
|
|
|
3368
|
17 |
|
$maybeUTF32LE = 0; |
3369
|
17 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE'); |
3370
|
17 |
|
if ($test) { |
3371
|
11 |
|
$test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8'); |
3372
|
11 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE'); |
3373
|
11 |
|
if ($test3 === $test) { |
3374
|
11 |
|
if (\count($strChars) === 0) { |
3375
|
11 |
|
$strChars = self::count_chars($str, true); |
3376
|
|
|
} |
3377
|
11 |
|
foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) { |
3378
|
11 |
|
if (\in_array($test3char, $strChars, true) === true) { |
3379
|
11 |
|
$maybeUTF32LE++; |
3380
|
|
|
} |
3381
|
|
|
} |
3382
|
|
|
} |
3383
|
|
|
} |
3384
|
|
|
|
3385
|
17 |
|
$maybeUTF32BE = 0; |
3386
|
17 |
|
$test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE'); |
3387
|
17 |
|
if ($test) { |
3388
|
11 |
|
$test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8'); |
3389
|
11 |
|
$test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE'); |
3390
|
11 |
|
if ($test3 === $test) { |
3391
|
11 |
|
if (\count($strChars) === 0) { |
3392
|
7 |
|
$strChars = self::count_chars($str, true); |
3393
|
|
|
} |
3394
|
11 |
|
foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) { |
3395
|
11 |
|
if (\in_array($test3char, $strChars, true) === true) { |
3396
|
11 |
|
$maybeUTF32BE++; |
3397
|
|
|
} |
3398
|
|
|
} |
3399
|
|
|
} |
3400
|
|
|
} |
3401
|
|
|
|
3402
|
17 |
|
if ($maybeUTF32BE !== $maybeUTF32LE) { |
3403
|
2 |
|
if ($maybeUTF32LE > $maybeUTF32BE) { |
3404
|
2 |
|
return 1; |
3405
|
|
|
} |
3406
|
|
|
|
3407
|
2 |
|
return 2; |
3408
|
|
|
} |
3409
|
|
|
|
3410
|
17 |
|
return false; |
3411
|
|
|
} |
3412
|
|
|
|
3413
|
|
|
/** |
3414
|
|
|
* Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters. |
3415
|
|
|
* |
3416
|
|
|
* @see http://hsivonen.iki.fi/php-utf8/ |
3417
|
|
|
* |
3418
|
|
|
* @param string|string[] $str <p>The string to be checked.</p> |
3419
|
|
|
* @param bool $strict <p>Check also if the string is not UTF-16 or UTF-32.</p> |
3420
|
|
|
* |
3421
|
|
|
* @return bool |
3422
|
|
|
*/ |
3423
|
106 |
|
public static function is_utf8($str, bool $strict = false): bool |
3424
|
|
|
{ |
3425
|
106 |
|
if (\is_array($str) === true) { |
3426
|
2 |
|
foreach ($str as $k => $v) { |
3427
|
2 |
|
if (self::is_utf8($v, $strict) === false) { |
3428
|
2 |
|
return false; |
3429
|
|
|
} |
3430
|
|
|
} |
3431
|
|
|
|
3432
|
|
|
return true; |
3433
|
|
|
} |
3434
|
|
|
|
3435
|
106 |
|
if ($str === '') { |
3436
|
12 |
|
return true; |
3437
|
|
|
} |
3438
|
|
|
|
3439
|
102 |
|
if ($strict === true) { |
3440
|
2 |
|
$isBinary = self::is_binary($str, true); |
3441
|
|
|
|
3442
|
2 |
|
if ($isBinary && self::is_utf16($str, false) !== false) { |
3443
|
2 |
|
return false; |
3444
|
|
|
} |
3445
|
|
|
|
3446
|
|
|
if ($isBinary && self::is_utf32($str, false) !== false) { |
3447
|
|
|
return false; |
3448
|
|
|
} |
3449
|
|
|
} |
3450
|
|
|
|
3451
|
102 |
|
if (self::pcre_utf8_support() !== true) { |
3452
|
|
|
|
3453
|
|
|
// If even just the first character can be matched, when the /u |
3454
|
|
|
// modifier is used, then it's valid UTF-8. If the UTF-8 is somehow |
3455
|
|
|
// invalid, nothing at all will match, even if the string contains |
3456
|
|
|
// some valid sequences |
3457
|
|
|
return \preg_match('/^.{1}/us', $str, $ar) === 1; |
3458
|
|
|
} |
3459
|
|
|
|
3460
|
102 |
|
$mState = 0; // cached expected number of octets after the current octet |
3461
|
|
|
// until the beginning of the next UTF8 character sequence |
3462
|
102 |
|
$mUcs4 = 0; // cached Unicode character |
3463
|
102 |
|
$mBytes = 1; // cached expected number of octets in the current sequence |
3464
|
|
|
|
3465
|
102 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
3466
|
|
|
self::checkForSupport(); |
3467
|
|
|
} |
3468
|
|
|
|
3469
|
102 |
|
if (self::$ORD === null) { |
3470
|
|
|
self::$ORD = self::getData('ord'); |
3471
|
|
|
} |
3472
|
|
|
|
3473
|
102 |
|
$len = self::strlen_in_byte((string) $str); |
3474
|
|
|
/** @noinspection ForeachInvariantsInspection */ |
3475
|
102 |
|
for ($i = 0; $i < $len; $i++) { |
3476
|
102 |
|
$in = self::$ORD[$str[$i]]; |
3477
|
102 |
|
if ($mState === 0) { |
3478
|
|
|
// When mState is zero we expect either a US-ASCII character or a |
3479
|
|
|
// multi-octet sequence. |
3480
|
102 |
|
if ((0x80 & $in) === 0) { |
3481
|
|
|
// US-ASCII, pass straight through. |
3482
|
98 |
|
$mBytes = 1; |
3483
|
83 |
|
} elseif ((0xE0 & $in) === 0xC0) { |
3484
|
|
|
// First octet of 2 octet sequence. |
3485
|
74 |
|
$mUcs4 = $in; |
3486
|
74 |
|
$mUcs4 = ($mUcs4 & 0x1F) << 6; |
3487
|
74 |
|
$mState = 1; |
3488
|
74 |
|
$mBytes = 2; |
3489
|
58 |
|
} elseif ((0xF0 & $in) === 0xE0) { |
3490
|
|
|
// First octet of 3 octet sequence. |
3491
|
41 |
|
$mUcs4 = $in; |
3492
|
41 |
|
$mUcs4 = ($mUcs4 & 0x0F) << 12; |
3493
|
41 |
|
$mState = 2; |
3494
|
41 |
|
$mBytes = 3; |
3495
|
30 |
|
} elseif ((0xF8 & $in) === 0xF0) { |
3496
|
|
|
// First octet of 4 octet sequence. |
3497
|
19 |
|
$mUcs4 = $in; |
3498
|
19 |
|
$mUcs4 = ($mUcs4 & 0x07) << 18; |
3499
|
19 |
|
$mState = 3; |
3500
|
19 |
|
$mBytes = 4; |
3501
|
13 |
|
} elseif ((0xFC & $in) === 0xF8) { |
3502
|
|
|
/* First octet of 5 octet sequence. |
3503
|
|
|
* |
3504
|
|
|
* This is illegal because the encoded codepoint must be either |
3505
|
|
|
* (a) not the shortest form or |
3506
|
|
|
* (b) outside the Unicode range of 0-0x10FFFF. |
3507
|
|
|
* Rather than trying to resynchronize, we will carry on until the end |
3508
|
|
|
* of the sequence and let the later error handling code catch it. |
3509
|
|
|
*/ |
3510
|
5 |
|
$mUcs4 = $in; |
3511
|
5 |
|
$mUcs4 = ($mUcs4 & 0x03) << 24; |
3512
|
5 |
|
$mState = 4; |
3513
|
5 |
|
$mBytes = 5; |
3514
|
10 |
|
} elseif ((0xFE & $in) === 0xFC) { |
3515
|
|
|
// First octet of 6 octet sequence, see comments for 5 octet sequence. |
3516
|
5 |
|
$mUcs4 = $in; |
3517
|
5 |
|
$mUcs4 = ($mUcs4 & 1) << 30; |
3518
|
5 |
|
$mState = 5; |
3519
|
5 |
|
$mBytes = 6; |
3520
|
|
|
} else { |
3521
|
|
|
// Current octet is neither in the US-ASCII range nor a legal first |
3522
|
|
|
// octet of a multi-octet sequence. |
3523
|
102 |
|
return false; |
3524
|
|
|
} |
3525
|
|
|
} else { |
3526
|
|
|
// When mState is non-zero, we expect a continuation of the multi-octet |
3527
|
|
|
// sequence |
3528
|
83 |
|
if ((0xC0 & $in) === 0x80) { |
3529
|
|
|
// Legal continuation. |
3530
|
75 |
|
$shift = ($mState - 1) * 6; |
3531
|
75 |
|
$tmp = $in; |
3532
|
75 |
|
$tmp = ($tmp & 0x0000003F) << $shift; |
3533
|
75 |
|
$mUcs4 |= $tmp; |
3534
|
|
|
// Prefix: End of the multi-octet sequence. mUcs4 now contains the final |
3535
|
|
|
// Unicode code point to be output. |
3536
|
75 |
|
if (--$mState === 0) { |
3537
|
|
|
// Check for illegal sequences and code points. |
3538
|
|
|
// |
3539
|
|
|
// From Unicode 3.1, non-shortest form is illegal |
3540
|
|
|
if ( |
3541
|
75 |
|
($mBytes === 2 && $mUcs4 < 0x0080) |
3542
|
|
|
|| |
3543
|
75 |
|
($mBytes === 3 && $mUcs4 < 0x0800) |
3544
|
|
|
|| |
3545
|
75 |
|
($mBytes === 4 && $mUcs4 < 0x10000) |
3546
|
|
|
|| |
3547
|
75 |
|
($mBytes > 4) |
3548
|
|
|
|| |
3549
|
|
|
// From Unicode 3.2, surrogate characters are illegal. |
3550
|
75 |
|
(($mUcs4 & 0xFFFFF800) === 0xD800) |
3551
|
|
|
|| |
3552
|
|
|
// Code points outside the Unicode range are illegal. |
3553
|
75 |
|
($mUcs4 > 0x10FFFF) |
3554
|
|
|
) { |
3555
|
8 |
|
return false; |
3556
|
|
|
} |
3557
|
|
|
// initialize UTF8 cache |
3558
|
75 |
|
$mState = 0; |
3559
|
75 |
|
$mUcs4 = 0; |
3560
|
75 |
|
$mBytes = 1; |
3561
|
|
|
} |
3562
|
|
|
} else { |
3563
|
|
|
// ((0xC0 & (*in) != 0x80) && (mState != 0)) |
3564
|
|
|
// Incomplete multi-octet sequence. |
3565
|
36 |
|
return false; |
3566
|
|
|
} |
3567
|
|
|
} |
3568
|
|
|
} |
3569
|
|
|
|
3570
|
66 |
|
return true; |
3571
|
|
|
} |
3572
|
|
|
|
3573
|
|
|
/** |
3574
|
|
|
* (PHP 5 >= 5.2.0, PECL json >= 1.2.0)<br/> |
3575
|
|
|
* Decodes a JSON string |
3576
|
|
|
* |
3577
|
|
|
* @see http://php.net/manual/en/function.json-decode.php |
3578
|
|
|
* |
3579
|
|
|
* @param string $json <p> |
3580
|
|
|
* The <i>json</i> string being decoded. |
3581
|
|
|
* </p> |
3582
|
|
|
* <p> |
3583
|
|
|
* This function only works with UTF-8 encoded strings. |
3584
|
|
|
* </p> |
3585
|
|
|
* <p>PHP implements a superset of |
3586
|
|
|
* JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard |
3587
|
|
|
* only supports these values when they are nested inside an array or an object. |
3588
|
|
|
* </p> |
3589
|
|
|
* @param bool $assoc [optional] <p> |
3590
|
|
|
* When <b>TRUE</b>, returned objects will be converted into |
3591
|
|
|
* associative arrays. |
3592
|
|
|
* </p> |
3593
|
|
|
* @param int $depth [optional] <p> |
3594
|
|
|
* User specified recursion depth. |
3595
|
|
|
* </p> |
3596
|
|
|
* @param int $options [optional] <p> |
3597
|
|
|
* Bitmask of JSON decode options. Currently only |
3598
|
|
|
* <b>JSON_BIGINT_AS_STRING</b> |
3599
|
|
|
* is supported (default is to cast large integers as floats) |
3600
|
|
|
* </p> |
3601
|
|
|
* |
3602
|
|
|
* @return mixed |
3603
|
|
|
* The value encoded in <i>json</i> in appropriate PHP type. Values true, false and |
3604
|
|
|
* null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively. |
3605
|
|
|
* <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data |
3606
|
|
|
* is deeper than the recursion limit. |
3607
|
|
|
*/ |
3608
|
24 |
|
public static function json_decode(string $json, bool $assoc = false, int $depth = 512, int $options = 0) |
3609
|
|
|
{ |
3610
|
24 |
|
$json = self::filter($json); |
3611
|
|
|
|
3612
|
24 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
3613
|
|
|
self::checkForSupport(); |
3614
|
|
|
} |
3615
|
|
|
|
3616
|
24 |
|
if (self::$SUPPORT['json'] === false) { |
3617
|
|
|
throw new \RuntimeException('ext-json: is not installed'); |
3618
|
|
|
} |
3619
|
|
|
|
3620
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3621
|
24 |
|
return \json_decode($json, $assoc, $depth, $options); |
3622
|
|
|
} |
3623
|
|
|
|
3624
|
|
|
/** |
3625
|
|
|
* (PHP 5 >= 5.2.0, PECL json >= 1.2.0)<br/> |
3626
|
|
|
* Returns the JSON representation of a value. |
3627
|
|
|
* |
3628
|
|
|
* @see http://php.net/manual/en/function.json-encode.php |
3629
|
|
|
* |
3630
|
|
|
* @param mixed $value <p> |
3631
|
|
|
* The <i>value</i> being encoded. Can be any type except |
3632
|
|
|
* a resource. |
3633
|
|
|
* </p> |
3634
|
|
|
* <p> |
3635
|
|
|
* All string data must be UTF-8 encoded. |
3636
|
|
|
* </p> |
3637
|
|
|
* <p>PHP implements a superset of |
3638
|
|
|
* JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard |
3639
|
|
|
* only supports these values when they are nested inside an array or an object. |
3640
|
|
|
* </p> |
3641
|
|
|
* @param int $options [optional] <p> |
3642
|
|
|
* Bitmask consisting of <b>JSON_HEX_QUOT</b>, |
3643
|
|
|
* <b>JSON_HEX_TAG</b>, |
3644
|
|
|
* <b>JSON_HEX_AMP</b>, |
3645
|
|
|
* <b>JSON_HEX_APOS</b>, |
3646
|
|
|
* <b>JSON_NUMERIC_CHECK</b>, |
3647
|
|
|
* <b>JSON_PRETTY_PRINT</b>, |
3648
|
|
|
* <b>JSON_UNESCAPED_SLASHES</b>, |
3649
|
|
|
* <b>JSON_FORCE_OBJECT</b>, |
3650
|
|
|
* <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these |
3651
|
|
|
* constants is described on |
3652
|
|
|
* the JSON constants page. |
3653
|
|
|
* </p> |
3654
|
|
|
* @param int $depth [optional] <p> |
3655
|
|
|
* Set the maximum depth. Must be greater than zero. |
3656
|
|
|
* </p> |
3657
|
|
|
* |
3658
|
|
|
* @return false|string |
3659
|
|
|
* A JSON encoded <strong>string</strong> on success or<br> |
3660
|
|
|
* <strong>FALSE</strong> on failure |
3661
|
|
|
*/ |
3662
|
5 |
|
public static function json_encode($value, int $options = 0, int $depth = 512) |
3663
|
|
|
{ |
3664
|
5 |
|
$value = self::filter($value); |
3665
|
|
|
|
3666
|
5 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
3667
|
|
|
self::checkForSupport(); |
3668
|
|
|
} |
3669
|
|
|
|
3670
|
5 |
|
if (self::$SUPPORT['json'] === false) { |
3671
|
|
|
throw new \RuntimeException('ext-json: is not installed'); |
3672
|
|
|
} |
3673
|
|
|
|
3674
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3675
|
5 |
|
return \json_encode($value, $options, $depth); |
3676
|
|
|
} |
3677
|
|
|
|
3678
|
|
|
/** |
3679
|
|
|
* Checks whether JSON is available on the server. |
3680
|
|
|
* |
3681
|
|
|
* @return bool |
3682
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
3683
|
|
|
*/ |
3684
|
|
|
public static function json_loaded(): bool |
3685
|
|
|
{ |
3686
|
|
|
return \function_exists('json_decode'); |
3687
|
|
|
} |
3688
|
|
|
|
3689
|
|
|
/** |
3690
|
|
|
* Makes string's first char lowercase. |
3691
|
|
|
* |
3692
|
|
|
* @param string $str <p>The input string</p> |
3693
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
3694
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
3695
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
3696
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
3697
|
|
|
* |
3698
|
|
|
* @return string the resulting string |
3699
|
|
|
*/ |
3700
|
46 |
|
public static function lcfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string |
3701
|
|
|
{ |
3702
|
46 |
|
$strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8); |
3703
|
46 |
|
if ($strPartTwo === false) { |
3704
|
|
|
$strPartTwo = ''; |
3705
|
|
|
} |
3706
|
|
|
|
3707
|
46 |
|
$strPartOne = self::strtolower( |
3708
|
46 |
|
(string) self::substr($str, 0, 1, $encoding, $cleanUtf8), |
3709
|
46 |
|
$encoding, |
3710
|
46 |
|
$cleanUtf8, |
3711
|
46 |
|
$lang, |
3712
|
46 |
|
$tryToKeepStringLength |
3713
|
|
|
); |
3714
|
|
|
|
3715
|
46 |
|
return $strPartOne . $strPartTwo; |
3716
|
|
|
} |
3717
|
|
|
|
3718
|
|
|
/** |
3719
|
|
|
* alias for "UTF8::lcfirst()" |
3720
|
|
|
* |
3721
|
|
|
* @see UTF8::lcfirst() |
3722
|
|
|
* |
3723
|
|
|
* @param string $str |
3724
|
|
|
* @param string $encoding |
3725
|
|
|
* @param bool $cleanUtf8 |
3726
|
|
|
* @param string|null $lang |
3727
|
|
|
* @param bool $tryToKeepStringLength |
3728
|
|
|
* |
3729
|
|
|
* @return string |
3730
|
|
|
*/ |
3731
|
2 |
|
public static function lcword( |
3732
|
|
|
string $str, |
3733
|
|
|
string $encoding = 'UTF-8', |
3734
|
|
|
bool $cleanUtf8 = false, |
3735
|
|
|
string $lang = null, |
3736
|
|
|
bool $tryToKeepStringLength = false |
3737
|
|
|
): string { |
3738
|
2 |
|
return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
3739
|
|
|
} |
3740
|
|
|
|
3741
|
|
|
/** |
3742
|
|
|
* Lowercase for all words in the string. |
3743
|
|
|
* |
3744
|
|
|
* @param string $str <p>The input string.</p> |
3745
|
|
|
* @param string[] $exceptions [optional] <p>Exclusion for some words.</p> |
3746
|
|
|
* @param string $charlist [optional] <p>Additional chars that contains to words and do not start |
3747
|
|
|
* a new word.</p> |
3748
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
3749
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
3750
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
3751
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
3752
|
|
|
* |
3753
|
|
|
* @return string |
3754
|
|
|
*/ |
3755
|
2 |
|
public static function lcwords( |
3756
|
|
|
string $str, |
3757
|
|
|
array $exceptions = [], |
3758
|
|
|
string $charlist = '', |
3759
|
|
|
string $encoding = 'UTF-8', |
3760
|
|
|
bool $cleanUtf8 = false, |
3761
|
|
|
string $lang = null, |
3762
|
|
|
bool $tryToKeepStringLength = false |
3763
|
|
|
): string { |
3764
|
2 |
|
if (!$str) { |
3765
|
2 |
|
return ''; |
3766
|
|
|
} |
3767
|
|
|
|
3768
|
2 |
|
$words = self::str_to_words($str, $charlist); |
3769
|
2 |
|
$newWords = []; |
3770
|
|
|
|
3771
|
2 |
|
if (\count($exceptions) > 0) { |
3772
|
2 |
|
$useExceptions = true; |
3773
|
|
|
} else { |
3774
|
2 |
|
$useExceptions = false; |
3775
|
|
|
} |
3776
|
|
|
|
3777
|
2 |
|
foreach ($words as $word) { |
3778
|
2 |
|
if (!$word) { |
3779
|
2 |
|
continue; |
3780
|
|
|
} |
3781
|
|
|
|
3782
|
|
|
if ( |
3783
|
2 |
|
$useExceptions === false |
3784
|
|
|
|| |
3785
|
|
|
( |
3786
|
2 |
|
$useExceptions === true |
3787
|
|
|
&& |
3788
|
2 |
|
!\in_array($word, $exceptions, true) |
3789
|
|
|
) |
3790
|
|
|
) { |
3791
|
2 |
|
$word = self::lcfirst($word, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
3792
|
|
|
} |
3793
|
|
|
|
3794
|
2 |
|
$newWords[] = $word; |
3795
|
|
|
} |
3796
|
|
|
|
3797
|
2 |
|
return \implode('', $newWords); |
3798
|
|
|
} |
3799
|
|
|
|
3800
|
|
|
/** |
3801
|
|
|
* alias for "UTF8::lcfirst()" |
3802
|
|
|
* |
3803
|
|
|
* @see UTF8::lcfirst() |
3804
|
|
|
* |
3805
|
|
|
* @param string $str |
3806
|
|
|
* @param string $encoding |
3807
|
|
|
* @param bool $cleanUtf8 |
3808
|
|
|
* @param string|null $lang |
3809
|
|
|
* @param bool $tryToKeepStringLength |
3810
|
|
|
* |
3811
|
|
|
* @return string |
3812
|
|
|
*/ |
3813
|
5 |
|
public static function lowerCaseFirst( |
3814
|
|
|
string $str, |
3815
|
|
|
string $encoding = 'UTF-8', |
3816
|
|
|
bool $cleanUtf8 = false, |
3817
|
|
|
string $lang = null, |
3818
|
|
|
bool $tryToKeepStringLength = false |
3819
|
|
|
): string { |
3820
|
5 |
|
return self::lcfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
3821
|
|
|
} |
3822
|
|
|
|
3823
|
|
|
/** |
3824
|
|
|
* Strip whitespace or other characters from beginning of a UTF-8 string. |
3825
|
|
|
* |
3826
|
|
|
* @param string $str <p>The string to be trimmed</p> |
3827
|
|
|
* @param mixed $chars <p>Optional characters to be stripped</p> |
3828
|
|
|
* |
3829
|
|
|
* @return string the string with unwanted characters stripped from the left |
3830
|
|
|
*/ |
3831
|
22 |
|
public static function ltrim(string $str = '', $chars = \INF): string |
3832
|
|
|
{ |
3833
|
22 |
|
if ($str === '') { |
3834
|
3 |
|
return ''; |
3835
|
|
|
} |
3836
|
|
|
|
3837
|
|
|
// Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories |
3838
|
21 |
|
if ($chars === \INF || !$chars) { |
3839
|
14 |
|
$pattern = "^[\pZ\pC]+"; |
3840
|
|
|
} else { |
3841
|
10 |
|
$chars = \preg_quote($chars, '/'); |
3842
|
10 |
|
$pattern = "^[${chars}]+"; |
3843
|
|
|
} |
3844
|
|
|
|
3845
|
21 |
|
return self::regex_replace($str, $pattern, '', '', '/'); |
3846
|
|
|
} |
3847
|
|
|
|
3848
|
|
|
/** |
3849
|
|
|
* Returns the UTF-8 character with the maximum code point in the given data. |
3850
|
|
|
* |
3851
|
|
|
* @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p> |
3852
|
|
|
* |
3853
|
|
|
* @return string|null the character with the highest code point than others, returns null on failure or empty input |
3854
|
|
|
*/ |
3855
|
2 |
|
public static function max($arg) |
3856
|
|
|
{ |
3857
|
2 |
|
if (\is_array($arg) === true) { |
3858
|
2 |
|
$arg = \implode('', $arg); |
3859
|
|
|
} |
3860
|
|
|
|
3861
|
2 |
|
$codepoints = self::codepoints($arg, false); |
3862
|
2 |
|
if (\count($codepoints) === 0) { |
3863
|
2 |
|
return null; |
3864
|
|
|
} |
3865
|
|
|
|
3866
|
2 |
|
$codepoint_max = \max($codepoints); |
3867
|
|
|
|
3868
|
2 |
|
return self::chr($codepoint_max); |
3869
|
|
|
} |
3870
|
|
|
|
3871
|
|
|
/** |
3872
|
|
|
* Calculates and returns the maximum number of bytes taken by any |
3873
|
|
|
* UTF-8 encoded character in the given string. |
3874
|
|
|
* |
3875
|
|
|
* @param string $str <p>The original Unicode string.</p> |
3876
|
|
|
* |
3877
|
|
|
* @return int max byte lengths of the given chars |
3878
|
|
|
*/ |
3879
|
2 |
|
public static function max_chr_width(string $str): int |
3880
|
|
|
{ |
3881
|
2 |
|
$bytes = self::chr_size_list($str); |
3882
|
2 |
|
if (\count($bytes) > 0) { |
3883
|
2 |
|
return (int) \max($bytes); |
3884
|
|
|
} |
3885
|
|
|
|
3886
|
2 |
|
return 0; |
3887
|
|
|
} |
3888
|
|
|
|
3889
|
|
|
/** |
3890
|
|
|
* Checks whether mbstring is available on the server. |
3891
|
|
|
* |
3892
|
|
|
* @return bool |
3893
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
3894
|
|
|
*/ |
3895
|
27 |
|
public static function mbstring_loaded(): bool |
3896
|
|
|
{ |
3897
|
27 |
|
$return = \extension_loaded('mbstring') ? true : false; |
3898
|
|
|
|
3899
|
27 |
|
if ($return === true) { |
3900
|
27 |
|
\mb_internal_encoding('UTF-8'); |
3901
|
|
|
} |
3902
|
|
|
|
3903
|
27 |
|
return $return; |
3904
|
|
|
} |
3905
|
|
|
|
3906
|
|
|
/** |
3907
|
|
|
* Checks whether mbstring "overloaded" is active on the server. |
3908
|
|
|
* |
3909
|
|
|
* @return bool |
3910
|
|
|
*/ |
3911
|
|
|
private static function mbstring_overloaded(): bool |
3912
|
|
|
{ |
3913
|
|
|
/** |
3914
|
|
|
* INI directive 'mbstring.func_overload' is deprecated since PHP 7.2 |
3915
|
|
|
*/ |
3916
|
|
|
|
3917
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
3918
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
3919
|
|
|
return \defined('MB_OVERLOAD_STRING') |
3920
|
|
|
&& |
3921
|
|
|
(@\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING); |
3922
|
|
|
} |
3923
|
|
|
|
3924
|
|
|
/** |
3925
|
|
|
* Returns the UTF-8 character with the minimum code point in the given data. |
3926
|
|
|
* |
3927
|
|
|
* @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong> |
3928
|
|
|
* |
3929
|
|
|
* @return string|null the character with the lowest code point than others, returns null on failure or empty input |
3930
|
|
|
*/ |
3931
|
2 |
|
public static function min($arg) |
3932
|
|
|
{ |
3933
|
2 |
|
if (\is_array($arg) === true) { |
3934
|
2 |
|
$arg = \implode('', $arg); |
3935
|
|
|
} |
3936
|
|
|
|
3937
|
2 |
|
$codepoints = self::codepoints($arg, false); |
3938
|
2 |
|
if (\count($codepoints) === 0) { |
3939
|
2 |
|
return null; |
3940
|
|
|
} |
3941
|
|
|
|
3942
|
2 |
|
$codepoint_min = \min($codepoints); |
3943
|
|
|
|
3944
|
2 |
|
return self::chr($codepoint_min); |
3945
|
|
|
} |
3946
|
|
|
|
3947
|
|
|
/** |
3948
|
|
|
* alias for "UTF8::normalize_encoding()" |
3949
|
|
|
* |
3950
|
|
|
* @see UTF8::normalize_encoding() |
3951
|
|
|
* |
3952
|
|
|
* @param mixed $encoding |
3953
|
|
|
* @param mixed $fallback |
3954
|
|
|
* |
3955
|
|
|
* @return mixed |
3956
|
|
|
* |
3957
|
|
|
* @deprecated <p>use "UTF8::normalize_encoding()"</p> |
3958
|
|
|
*/ |
3959
|
2 |
|
public static function normalizeEncoding($encoding, $fallback = '') |
3960
|
|
|
{ |
3961
|
2 |
|
return self::normalize_encoding($encoding, $fallback); |
3962
|
|
|
} |
3963
|
|
|
|
3964
|
|
|
/** |
3965
|
|
|
* Normalize the encoding-"name" input. |
3966
|
|
|
* |
3967
|
|
|
* @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p> |
3968
|
|
|
* @param mixed $fallback <p>e.g.: UTF-8</p> |
3969
|
|
|
* |
3970
|
|
|
* @return mixed e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default) |
3971
|
|
|
*/ |
3972
|
341 |
|
public static function normalize_encoding($encoding, $fallback = '') |
3973
|
|
|
{ |
3974
|
341 |
|
static $STATIC_NORMALIZE_ENCODING_CACHE = []; |
3975
|
|
|
|
3976
|
|
|
// init |
3977
|
341 |
|
$encoding = (string) $encoding; |
3978
|
|
|
|
3979
|
|
|
if ( |
3980
|
341 |
|
!$encoding |
3981
|
|
|
|| |
3982
|
50 |
|
$encoding === '1' // only a fallback, for non "strict_types" usage ... |
3983
|
|
|
|| |
3984
|
341 |
|
$encoding === '0' // only a fallback, for non "strict_types" usage ... |
3985
|
|
|
) { |
3986
|
296 |
|
return $fallback; |
3987
|
|
|
} |
3988
|
|
|
|
3989
|
|
|
if ( |
3990
|
49 |
|
$encoding === 'UTF-8' |
3991
|
|
|
|| |
3992
|
49 |
|
$encoding === 'UTF8' |
3993
|
|
|
) { |
3994
|
22 |
|
return 'UTF-8'; |
3995
|
|
|
} |
3996
|
|
|
|
3997
|
|
|
if ( |
3998
|
42 |
|
$encoding === '8BIT' |
3999
|
|
|
|| |
4000
|
42 |
|
$encoding === 'BINARY' |
4001
|
|
|
) { |
4002
|
|
|
return 'CP850'; |
4003
|
|
|
} |
4004
|
|
|
|
4005
|
|
|
if ( |
4006
|
42 |
|
$encoding === 'HTML' |
4007
|
|
|
|| |
4008
|
42 |
|
$encoding === 'HTML-ENTITIES' |
4009
|
|
|
) { |
4010
|
2 |
|
return 'HTML-ENTITIES'; |
4011
|
|
|
} |
4012
|
|
|
|
4013
|
42 |
|
if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) { |
4014
|
40 |
|
return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding]; |
4015
|
|
|
} |
4016
|
|
|
|
4017
|
6 |
|
if (self::$ENCODINGS === null) { |
4018
|
1 |
|
self::$ENCODINGS = self::getData('encodings'); |
4019
|
|
|
} |
4020
|
|
|
|
4021
|
6 |
|
if (\in_array($encoding, self::$ENCODINGS, true)) { |
4022
|
4 |
|
$STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding; |
4023
|
|
|
|
4024
|
4 |
|
return $encoding; |
4025
|
|
|
} |
4026
|
|
|
|
4027
|
5 |
|
$encodingOrig = $encoding; |
4028
|
5 |
|
$encoding = \strtoupper($encoding); |
4029
|
5 |
|
$encodingUpperHelper = \preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding); |
4030
|
|
|
|
4031
|
|
|
$equivalences = [ |
4032
|
5 |
|
'ISO8859' => 'ISO-8859-1', |
4033
|
|
|
'ISO88591' => 'ISO-8859-1', |
4034
|
|
|
'ISO' => 'ISO-8859-1', |
4035
|
|
|
'LATIN' => 'ISO-8859-1', |
4036
|
|
|
'LATIN1' => 'ISO-8859-1', // Western European |
4037
|
|
|
'ISO88592' => 'ISO-8859-2', |
4038
|
|
|
'LATIN2' => 'ISO-8859-2', // Central European |
4039
|
|
|
'ISO88593' => 'ISO-8859-3', |
4040
|
|
|
'LATIN3' => 'ISO-8859-3', // Southern European |
4041
|
|
|
'ISO88594' => 'ISO-8859-4', |
4042
|
|
|
'LATIN4' => 'ISO-8859-4', // Northern European |
4043
|
|
|
'ISO88595' => 'ISO-8859-5', |
4044
|
|
|
'ISO88596' => 'ISO-8859-6', // Greek |
4045
|
|
|
'ISO88597' => 'ISO-8859-7', |
4046
|
|
|
'ISO88598' => 'ISO-8859-8', // Hebrew |
4047
|
|
|
'ISO88599' => 'ISO-8859-9', |
4048
|
|
|
'LATIN5' => 'ISO-8859-9', // Turkish |
4049
|
|
|
'ISO885911' => 'ISO-8859-11', |
4050
|
|
|
'TIS620' => 'ISO-8859-11', // Thai |
4051
|
|
|
'ISO885910' => 'ISO-8859-10', |
4052
|
|
|
'LATIN6' => 'ISO-8859-10', // Nordic |
4053
|
|
|
'ISO885913' => 'ISO-8859-13', |
4054
|
|
|
'LATIN7' => 'ISO-8859-13', // Baltic |
4055
|
|
|
'ISO885914' => 'ISO-8859-14', |
4056
|
|
|
'LATIN8' => 'ISO-8859-14', // Celtic |
4057
|
|
|
'ISO885915' => 'ISO-8859-15', |
4058
|
|
|
'LATIN9' => 'ISO-8859-15', // Western European (with some extra chars e.g. €) |
4059
|
|
|
'ISO885916' => 'ISO-8859-16', |
4060
|
|
|
'LATIN10' => 'ISO-8859-16', // Southeast European |
4061
|
|
|
'CP1250' => 'WINDOWS-1250', |
4062
|
|
|
'WIN1250' => 'WINDOWS-1250', |
4063
|
|
|
'WINDOWS1250' => 'WINDOWS-1250', |
4064
|
|
|
'CP1251' => 'WINDOWS-1251', |
4065
|
|
|
'WIN1251' => 'WINDOWS-1251', |
4066
|
|
|
'WINDOWS1251' => 'WINDOWS-1251', |
4067
|
|
|
'CP1252' => 'WINDOWS-1252', |
4068
|
|
|
'WIN1252' => 'WINDOWS-1252', |
4069
|
|
|
'WINDOWS1252' => 'WINDOWS-1252', |
4070
|
|
|
'CP1253' => 'WINDOWS-1253', |
4071
|
|
|
'WIN1253' => 'WINDOWS-1253', |
4072
|
|
|
'WINDOWS1253' => 'WINDOWS-1253', |
4073
|
|
|
'CP1254' => 'WINDOWS-1254', |
4074
|
|
|
'WIN1254' => 'WINDOWS-1254', |
4075
|
|
|
'WINDOWS1254' => 'WINDOWS-1254', |
4076
|
|
|
'CP1255' => 'WINDOWS-1255', |
4077
|
|
|
'WIN1255' => 'WINDOWS-1255', |
4078
|
|
|
'WINDOWS1255' => 'WINDOWS-1255', |
4079
|
|
|
'CP1256' => 'WINDOWS-1256', |
4080
|
|
|
'WIN1256' => 'WINDOWS-1256', |
4081
|
|
|
'WINDOWS1256' => 'WINDOWS-1256', |
4082
|
|
|
'CP1257' => 'WINDOWS-1257', |
4083
|
|
|
'WIN1257' => 'WINDOWS-1257', |
4084
|
|
|
'WINDOWS1257' => 'WINDOWS-1257', |
4085
|
|
|
'CP1258' => 'WINDOWS-1258', |
4086
|
|
|
'WIN1258' => 'WINDOWS-1258', |
4087
|
|
|
'WINDOWS1258' => 'WINDOWS-1258', |
4088
|
|
|
'UTF16' => 'UTF-16', |
4089
|
|
|
'UTF32' => 'UTF-32', |
4090
|
|
|
'UTF8' => 'UTF-8', |
4091
|
|
|
'UTF' => 'UTF-8', |
4092
|
|
|
'UTF7' => 'UTF-7', |
4093
|
|
|
'8BIT' => 'CP850', |
4094
|
|
|
'BINARY' => 'CP850', |
4095
|
|
|
]; |
4096
|
|
|
|
4097
|
5 |
|
if (!empty($equivalences[$encodingUpperHelper])) { |
4098
|
4 |
|
$encoding = $equivalences[$encodingUpperHelper]; |
4099
|
|
|
} |
4100
|
|
|
|
4101
|
5 |
|
$STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding; |
4102
|
|
|
|
4103
|
5 |
|
return $encoding; |
4104
|
|
|
} |
4105
|
|
|
|
4106
|
|
|
/** |
4107
|
|
|
* Standardize line ending to unix-like. |
4108
|
|
|
* |
4109
|
|
|
* @param string $str |
4110
|
|
|
* |
4111
|
|
|
* @return string |
4112
|
|
|
*/ |
4113
|
5 |
|
public static function normalize_line_ending(string $str): string |
4114
|
|
|
{ |
4115
|
5 |
|
return (string) \str_replace(["\r\n", "\r"], "\n", $str); |
4116
|
|
|
} |
4117
|
|
|
|
4118
|
|
|
/** |
4119
|
|
|
* Normalize some MS Word special characters. |
4120
|
|
|
* |
4121
|
|
|
* @param string $str <p>The string to be normalized.</p> |
4122
|
|
|
* |
4123
|
|
|
* @return string |
4124
|
|
|
*/ |
4125
|
38 |
|
public static function normalize_msword(string $str): string |
4126
|
|
|
{ |
4127
|
38 |
|
if ($str === '') { |
4128
|
2 |
|
return ''; |
4129
|
|
|
} |
4130
|
|
|
|
4131
|
38 |
|
static $UTF8_MSWORD_KEYS_CACHE = null; |
4132
|
38 |
|
static $UTF8_MSWORD_VALUES_CACHE = null; |
4133
|
|
|
|
4134
|
38 |
|
if ($UTF8_MSWORD_KEYS_CACHE === null) { |
4135
|
1 |
|
if (self::$UTF8_MSWORD === null) { |
4136
|
1 |
|
self::$UTF8_MSWORD = self::getData('utf8_msword'); |
4137
|
|
|
} |
4138
|
|
|
|
4139
|
1 |
|
$UTF8_MSWORD_KEYS_CACHE = \array_keys(self::$UTF8_MSWORD); |
4140
|
1 |
|
$UTF8_MSWORD_VALUES_CACHE = \array_values(self::$UTF8_MSWORD); |
4141
|
|
|
} |
4142
|
|
|
|
4143
|
38 |
|
return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str); |
4144
|
|
|
} |
4145
|
|
|
|
4146
|
|
|
/** |
4147
|
|
|
* Normalize the whitespace. |
4148
|
|
|
* |
4149
|
|
|
* @param string $str <p>The string to be normalized.</p> |
4150
|
|
|
* @param bool $keepNonBreakingSpace [optional] <p>Set to true, to keep non-breaking-spaces.</p> |
4151
|
|
|
* @param bool $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web) |
4152
|
|
|
* bidirectional text chars.</p> |
4153
|
|
|
* |
4154
|
|
|
* @return string |
4155
|
|
|
*/ |
4156
|
86 |
|
public static function normalize_whitespace(string $str, bool $keepNonBreakingSpace = false, bool $keepBidiUnicodeControls = false): string |
4157
|
|
|
{ |
4158
|
86 |
|
if ($str === '') { |
4159
|
9 |
|
return ''; |
4160
|
|
|
} |
4161
|
|
|
|
4162
|
86 |
|
static $WHITESPACE_CACHE = []; |
4163
|
86 |
|
$cacheKey = (int) $keepNonBreakingSpace; |
4164
|
|
|
|
4165
|
86 |
|
if (!isset($WHITESPACE_CACHE[$cacheKey])) { |
4166
|
2 |
|
$WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE; |
4167
|
|
|
|
4168
|
2 |
|
if ($keepNonBreakingSpace === true) { |
4169
|
1 |
|
unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']); |
4170
|
|
|
} |
4171
|
|
|
|
4172
|
2 |
|
$WHITESPACE_CACHE[$cacheKey] = \array_values($WHITESPACE_CACHE[$cacheKey]); |
4173
|
|
|
} |
4174
|
|
|
|
4175
|
86 |
|
if ($keepBidiUnicodeControls === false) { |
4176
|
86 |
|
static $BIDI_UNICODE_CONTROLS_CACHE = null; |
4177
|
|
|
|
4178
|
86 |
|
if ($BIDI_UNICODE_CONTROLS_CACHE === null) { |
4179
|
1 |
|
$BIDI_UNICODE_CONTROLS_CACHE = \array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE); |
4180
|
|
|
} |
4181
|
|
|
|
4182
|
86 |
|
$str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str); |
4183
|
|
|
} |
4184
|
|
|
|
4185
|
86 |
|
return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str); |
4186
|
|
|
} |
4187
|
|
|
|
4188
|
|
|
/** |
4189
|
|
|
* Calculates Unicode code point of the given UTF-8 encoded character. |
4190
|
|
|
* |
4191
|
|
|
* INFO: opposite to UTF8::chr() |
4192
|
|
|
* |
4193
|
|
|
* @param string $chr <p>The character of which to calculate code point.<p/> |
4194
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
4195
|
|
|
* |
4196
|
|
|
* @return int |
4197
|
|
|
* Unicode code point of the given character,<br> |
4198
|
|
|
* 0 on invalid UTF-8 byte sequence |
4199
|
|
|
*/ |
4200
|
30 |
|
public static function ord($chr, string $encoding = 'UTF-8'): int |
4201
|
|
|
{ |
4202
|
|
|
// init |
4203
|
30 |
|
$chr = (string) $chr; |
4204
|
|
|
|
4205
|
30 |
|
static $CHAR_CACHE = []; |
4206
|
|
|
|
4207
|
|
|
// save the original string |
4208
|
30 |
|
$chr_orig = $chr; |
4209
|
|
|
|
4210
|
30 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
4211
|
5 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
4212
|
|
|
} |
4213
|
|
|
|
4214
|
30 |
|
$cacheKey = $chr_orig . $encoding; |
4215
|
30 |
|
if (isset($CHAR_CACHE[$cacheKey]) === true) { |
4216
|
23 |
|
return $CHAR_CACHE[$cacheKey]; |
4217
|
|
|
} |
4218
|
|
|
|
4219
|
25 |
|
if (self::$ORD === null) { |
4220
|
|
|
self::$ORD = self::getData('ord'); |
4221
|
|
|
} |
4222
|
|
|
|
4223
|
25 |
|
if (isset(self::$ORD[$chr])) { |
4224
|
25 |
|
return self::$ORD[$chr]; |
4225
|
|
|
} |
4226
|
|
|
|
4227
|
|
|
// check again, if it's still not UTF-8 |
4228
|
7 |
|
if ($encoding !== 'UTF-8') { |
4229
|
1 |
|
$chr = self::encode($encoding, $chr); |
4230
|
|
|
} |
4231
|
|
|
|
4232
|
7 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
4233
|
|
|
self::checkForSupport(); |
4234
|
|
|
} |
4235
|
|
|
|
4236
|
7 |
|
if (self::$SUPPORT['intlChar'] === true) { |
4237
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
4238
|
6 |
|
$code = \IntlChar::ord($chr); |
4239
|
6 |
|
if ($code) { |
4240
|
5 |
|
return $CHAR_CACHE[$cacheKey] = $code; |
4241
|
|
|
} |
4242
|
|
|
} |
4243
|
|
|
|
4244
|
|
|
/** @noinspection CallableParameterUseCaseInTypeContextInspection */ |
4245
|
2 |
|
$chr = \unpack('C*', (string) self::substr($chr, 0, 4, 'CP850')); |
4246
|
2 |
|
$code = $chr ? $chr[1] : 0; |
4247
|
|
|
|
4248
|
2 |
|
if ($code >= 0xF0 && isset($chr[4])) { |
4249
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
4250
|
|
|
return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80); |
4251
|
|
|
} |
4252
|
|
|
|
4253
|
2 |
|
if ($code >= 0xE0 && isset($chr[3])) { |
4254
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
4255
|
1 |
|
return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80); |
4256
|
|
|
} |
4257
|
|
|
|
4258
|
2 |
|
if ($code >= 0xC0 && isset($chr[2])) { |
4259
|
|
|
/** @noinspection UnnecessaryCastingInspection */ |
4260
|
1 |
|
return $CHAR_CACHE[$cacheKey] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80); |
4261
|
|
|
} |
4262
|
|
|
|
4263
|
1 |
|
return $CHAR_CACHE[$cacheKey] = $code; |
4264
|
|
|
} |
4265
|
|
|
|
4266
|
|
|
/** |
4267
|
|
|
* Parses the string into an array (into the the second parameter). |
4268
|
|
|
* |
4269
|
|
|
* WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope, |
4270
|
|
|
* if the second parameter is not set! |
4271
|
|
|
* |
4272
|
|
|
* @see http://php.net/manual/en/function.parse-str.php |
4273
|
|
|
* |
4274
|
|
|
* @param string $str <p>The input string.</p> |
4275
|
|
|
* @param array $result <p>The result will be returned into this reference parameter.</p> |
4276
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
4277
|
|
|
* |
4278
|
|
|
* @return bool |
4279
|
|
|
* Will return <strong>false</strong> if php can't parse the string and we haven't any $result |
4280
|
|
|
*/ |
4281
|
2 |
|
public static function parse_str(string $str, &$result, bool $cleanUtf8 = false): bool |
4282
|
|
|
{ |
4283
|
2 |
|
if ($cleanUtf8 === true) { |
4284
|
2 |
|
$str = self::clean($str); |
4285
|
|
|
} |
4286
|
|
|
|
4287
|
2 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
4288
|
|
|
self::checkForSupport(); |
4289
|
|
|
} |
4290
|
|
|
|
4291
|
2 |
|
if (self::$SUPPORT['mbstring'] === true) { |
4292
|
2 |
|
$return = \mb_parse_str($str, $result); |
4293
|
|
|
|
4294
|
2 |
|
return !($return === false || empty($result)); |
4295
|
|
|
} |
4296
|
|
|
|
4297
|
|
|
/** @noinspection PhpVoidFunctionResultUsedInspection */ |
4298
|
|
|
\parse_str($str, $result); |
4299
|
|
|
|
4300
|
|
|
return !empty($result); |
4301
|
|
|
} |
4302
|
|
|
|
4303
|
|
|
/** |
4304
|
|
|
* Checks if \u modifier is available that enables Unicode support in PCRE. |
4305
|
|
|
* |
4306
|
|
|
* @return bool |
4307
|
|
|
* <strong>true</strong> if support is available,<br> |
4308
|
|
|
* <strong>false</strong> otherwise |
4309
|
|
|
*/ |
4310
|
102 |
|
public static function pcre_utf8_support(): bool |
4311
|
|
|
{ |
4312
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
4313
|
102 |
|
return (bool) @\preg_match('//u', ''); |
4314
|
|
|
} |
4315
|
|
|
|
4316
|
|
|
/** |
4317
|
|
|
* Create an array containing a range of UTF-8 characters. |
4318
|
|
|
* |
4319
|
|
|
* @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p> |
4320
|
|
|
* @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p> |
4321
|
|
|
* |
4322
|
|
|
* @return string[] |
4323
|
|
|
*/ |
4324
|
2 |
|
public static function range($var1, $var2): array |
4325
|
|
|
{ |
4326
|
2 |
|
if (!$var1 || !$var2) { |
4327
|
2 |
|
return []; |
4328
|
|
|
} |
4329
|
|
|
|
4330
|
2 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
4331
|
|
|
self::checkForSupport(); |
4332
|
|
|
} |
4333
|
|
|
|
4334
|
2 |
|
if (self::$SUPPORT['ctype'] === false) { |
4335
|
|
|
throw new \RuntimeException('ext-ctype: is not installed'); |
4336
|
|
|
} |
4337
|
|
|
|
4338
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
4339
|
2 |
|
if (\ctype_digit((string) $var1)) { |
4340
|
2 |
|
$start = (int) $var1; |
4341
|
2 |
|
} /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var1)) { |
4342
|
|
|
$start = (int) self::hex_to_int($var1); |
4343
|
|
|
} else { |
4344
|
2 |
|
$start = self::ord($var1); |
4345
|
|
|
} |
4346
|
|
|
|
4347
|
2 |
|
if (!$start) { |
4348
|
|
|
return []; |
4349
|
|
|
} |
4350
|
|
|
|
4351
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
4352
|
2 |
|
if (\ctype_digit((string) $var2)) { |
4353
|
2 |
|
$end = (int) $var2; |
4354
|
2 |
|
} /** @noinspection PhpComposerExtensionStubsInspection */ elseif (\ctype_xdigit($var2)) { |
4355
|
|
|
$end = (int) self::hex_to_int($var2); |
4356
|
|
|
} else { |
4357
|
2 |
|
$end = self::ord($var2); |
4358
|
|
|
} |
4359
|
|
|
|
4360
|
2 |
|
if (!$end) { |
4361
|
|
|
return []; |
4362
|
|
|
} |
4363
|
|
|
|
4364
|
2 |
|
return \array_map( |
4365
|
|
|
[ |
4366
|
2 |
|
self::class, |
4367
|
|
|
'chr', |
4368
|
|
|
], |
4369
|
2 |
|
\range($start, $end) |
4370
|
|
|
); |
4371
|
|
|
} |
4372
|
|
|
|
4373
|
|
|
/** |
4374
|
|
|
* Multi decode html entity & fix urlencoded-win1252-chars. |
4375
|
|
|
* |
4376
|
|
|
* e.g: |
4377
|
|
|
* 'test+test' => 'test+test' |
4378
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
4379
|
|
|
* 'D%FCsseldorf' => 'Düsseldorf' |
4380
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
4381
|
|
|
* 'D%26%23xFC%3Bsseldorf' => 'Düsseldorf' |
4382
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
4383
|
|
|
* 'D%C3%BCsseldorf' => 'Düsseldorf' |
4384
|
|
|
* 'D%C3%83%C2%BCsseldorf' => 'Düsseldorf' |
4385
|
|
|
* 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf' |
4386
|
|
|
* |
4387
|
|
|
* @param string $str <p>The input string.</p> |
4388
|
|
|
* @param bool $multi_decode <p>Decode as often as possible.</p> |
4389
|
|
|
* |
4390
|
|
|
* @return string |
4391
|
|
|
*/ |
4392
|
3 |
|
public static function rawurldecode(string $str, bool $multi_decode = true): string |
4393
|
|
|
{ |
4394
|
3 |
|
if ($str === '') { |
4395
|
2 |
|
return ''; |
4396
|
|
|
} |
4397
|
|
|
|
4398
|
3 |
|
$pattern = '/%u([0-9a-f]{3,4})/i'; |
4399
|
3 |
|
if (\preg_match($pattern, $str)) { |
4400
|
2 |
|
$str = (string) \preg_replace($pattern, '&#x\\1;', \rawurldecode($str)); |
4401
|
|
|
} |
4402
|
|
|
|
4403
|
3 |
|
$flags = \ENT_QUOTES | \ENT_HTML5; |
4404
|
|
|
|
4405
|
|
|
do { |
4406
|
3 |
|
$str_compare = $str; |
4407
|
|
|
|
4408
|
3 |
|
$str = self::fix_simple_utf8( |
4409
|
3 |
|
\rawurldecode( |
4410
|
3 |
|
self::html_entity_decode( |
4411
|
3 |
|
self::to_utf8($str), |
4412
|
3 |
|
$flags |
4413
|
|
|
) |
4414
|
|
|
) |
4415
|
|
|
); |
4416
|
3 |
|
} while ($multi_decode === true && $str_compare !== $str); |
4417
|
|
|
|
4418
|
3 |
|
return $str; |
4419
|
|
|
} |
4420
|
|
|
|
4421
|
|
|
/** |
4422
|
|
|
* @param array $strings |
4423
|
|
|
* @param bool $removeEmptyValues |
4424
|
|
|
* @param int $removeShortValues |
4425
|
|
|
* |
4426
|
|
|
* @return array |
4427
|
|
|
*/ |
4428
|
2 |
|
private static function reduce_string_array(array $strings, bool $removeEmptyValues, int $removeShortValues = null): array |
4429
|
|
|
{ |
4430
|
|
|
// init |
4431
|
2 |
|
$return = []; |
4432
|
|
|
|
4433
|
2 |
|
foreach ($strings as $str) { |
4434
|
|
|
if ( |
4435
|
2 |
|
$removeShortValues !== null |
4436
|
|
|
&& |
4437
|
2 |
|
self::strlen($str) <= $removeShortValues |
4438
|
|
|
) { |
4439
|
2 |
|
continue; |
4440
|
|
|
} |
4441
|
|
|
|
4442
|
|
|
if ( |
4443
|
2 |
|
$removeEmptyValues === true |
4444
|
|
|
&& |
4445
|
2 |
|
\trim($str) === '' |
4446
|
|
|
) { |
4447
|
2 |
|
continue; |
4448
|
|
|
} |
4449
|
|
|
|
4450
|
2 |
|
$return[] = $str; |
4451
|
|
|
} |
4452
|
|
|
|
4453
|
2 |
|
return $return; |
4454
|
|
|
} |
4455
|
|
|
|
4456
|
|
|
/** |
4457
|
|
|
* Replaces all occurrences of $pattern in $str by $replacement. |
4458
|
|
|
* |
4459
|
|
|
* @param string $str <p>The input string.</p> |
4460
|
|
|
* @param string $pattern <p>The regular expression pattern.</p> |
4461
|
|
|
* @param string $replacement <p>The string to replace with.</p> |
4462
|
|
|
* @param string $options [optional] <p>Matching conditions to be used.</p> |
4463
|
|
|
* @param string $delimiter [optional] <p>Delimiter the the regex. Default: '/'</p> |
4464
|
|
|
* |
4465
|
|
|
* @return string |
4466
|
|
|
*/ |
4467
|
259 |
|
public static function regex_replace(string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/'): string |
4468
|
|
|
{ |
4469
|
259 |
|
if ($options === 'msr') { |
4470
|
9 |
|
$options = 'ms'; |
4471
|
|
|
} |
4472
|
|
|
|
4473
|
|
|
// fallback |
4474
|
259 |
|
if (!$delimiter) { |
4475
|
|
|
$delimiter = '/'; |
4476
|
|
|
} |
4477
|
|
|
|
4478
|
259 |
|
return (string) \preg_replace( |
4479
|
259 |
|
$delimiter . $pattern . $delimiter . 'u' . $options, |
4480
|
259 |
|
$replacement, |
4481
|
259 |
|
$str |
4482
|
|
|
); |
4483
|
|
|
} |
4484
|
|
|
|
4485
|
|
|
/** |
4486
|
|
|
* alias for "UTF8::remove_bom()" |
4487
|
|
|
* |
4488
|
|
|
* @see UTF8::remove_bom() |
4489
|
|
|
* |
4490
|
|
|
* @param string $str |
4491
|
|
|
* |
4492
|
|
|
* @return string |
4493
|
|
|
* |
4494
|
|
|
* @deprecated <p>use "UTF8::remove_bom()"</p> |
4495
|
|
|
*/ |
4496
|
|
|
public static function removeBOM(string $str): string |
4497
|
|
|
{ |
4498
|
|
|
return self::remove_bom($str); |
4499
|
|
|
} |
4500
|
|
|
|
4501
|
|
|
/** |
4502
|
|
|
* Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings. |
4503
|
|
|
* |
4504
|
|
|
* @param string $str <p>The input string.</p> |
4505
|
|
|
* |
4506
|
|
|
* @return string string without UTF-BOM |
4507
|
|
|
*/ |
4508
|
79 |
|
public static function remove_bom(string $str): string |
4509
|
|
|
{ |
4510
|
79 |
|
if ($str === '') { |
4511
|
7 |
|
return ''; |
4512
|
|
|
} |
4513
|
|
|
|
4514
|
79 |
|
$strLength = self::strlen_in_byte($str); |
4515
|
79 |
|
foreach (self::$BOM as $bomString => $bomByteLength) { |
4516
|
79 |
|
if (self::strpos_in_byte($str, $bomString, 0) === 0) { |
4517
|
10 |
|
$strTmp = self::substr_in_byte($str, $bomByteLength, $strLength); |
4518
|
10 |
|
if ($strTmp === false) { |
4519
|
|
|
return ''; |
4520
|
|
|
} |
4521
|
|
|
|
4522
|
10 |
|
$strLength -= $bomByteLength; |
4523
|
|
|
|
4524
|
79 |
|
$str = (string) $strTmp; |
4525
|
|
|
} |
4526
|
|
|
} |
4527
|
|
|
|
4528
|
79 |
|
return $str; |
4529
|
|
|
} |
4530
|
|
|
|
4531
|
|
|
/** |
4532
|
|
|
* Removes duplicate occurrences of a string in another string. |
4533
|
|
|
* |
4534
|
|
|
* @param string $str <p>The base string.</p> |
4535
|
|
|
* @param string|string[] $what <p>String to search for in the base string.</p> |
4536
|
|
|
* |
4537
|
|
|
* @return string the result string with removed duplicates |
4538
|
|
|
*/ |
4539
|
2 |
|
public static function remove_duplicates(string $str, $what = ' '): string |
4540
|
|
|
{ |
4541
|
2 |
|
if (\is_string($what) === true) { |
4542
|
2 |
|
$what = [$what]; |
4543
|
|
|
} |
4544
|
|
|
|
4545
|
2 |
|
if (\is_array($what) === true) { |
|
|
|
|
4546
|
|
|
/** @noinspection ForeachSourceInspection */ |
4547
|
2 |
|
foreach ($what as $item) { |
4548
|
2 |
|
$str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/', $item, $str); |
4549
|
|
|
} |
4550
|
|
|
} |
4551
|
|
|
|
4552
|
2 |
|
return $str; |
4553
|
|
|
} |
4554
|
|
|
|
4555
|
|
|
/** |
4556
|
|
|
* Remove html via "strip_tags()" from the string. |
4557
|
|
|
* |
4558
|
|
|
* @param string $str |
4559
|
|
|
* @param string $allowableTags [optional] <p>You can use the optional second parameter to specify tags which should |
4560
|
|
|
* not be stripped. Default: null |
4561
|
|
|
* </p> |
4562
|
|
|
* |
4563
|
|
|
* @return string |
4564
|
|
|
*/ |
4565
|
6 |
|
public static function remove_html(string $str, string $allowableTags = ''): string |
4566
|
|
|
{ |
4567
|
6 |
|
return \strip_tags($str, $allowableTags); |
4568
|
|
|
} |
4569
|
|
|
|
4570
|
|
|
/** |
4571
|
|
|
* Remove all breaks [<br> | \r\n | \r | \n | ...] from the string. |
4572
|
|
|
* |
4573
|
|
|
* @param string $str |
4574
|
|
|
* @param string $replacement [optional] <p>Default is a empty string.</p> |
4575
|
|
|
* |
4576
|
|
|
* @return string |
4577
|
|
|
*/ |
4578
|
6 |
|
public static function remove_html_breaks(string $str, string $replacement = ''): string |
4579
|
|
|
{ |
4580
|
6 |
|
return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str); |
4581
|
|
|
} |
4582
|
|
|
|
4583
|
|
|
/** |
4584
|
|
|
* Remove invisible characters from a string. |
4585
|
|
|
* |
4586
|
|
|
* e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script. |
4587
|
|
|
* |
4588
|
|
|
* copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php |
4589
|
|
|
* |
4590
|
|
|
* @param string $str |
4591
|
|
|
* @param bool $url_encoded |
4592
|
|
|
* @param string $replacement |
4593
|
|
|
* |
4594
|
|
|
* @return string |
4595
|
|
|
*/ |
4596
|
113 |
|
public static function remove_invisible_characters(string $str, bool $url_encoded = true, string $replacement = ''): string |
4597
|
|
|
{ |
4598
|
|
|
// init |
4599
|
113 |
|
$non_displayables = []; |
4600
|
|
|
|
4601
|
|
|
// every control character except newline (dec 10), |
4602
|
|
|
// carriage return (dec 13) and horizontal tab (dec 09) |
4603
|
113 |
|
if ($url_encoded) { |
4604
|
113 |
|
$non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15 |
4605
|
113 |
|
$non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31 |
4606
|
|
|
} |
4607
|
|
|
|
4608
|
113 |
|
$non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127 |
4609
|
|
|
|
4610
|
|
|
do { |
4611
|
113 |
|
$str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count); |
4612
|
113 |
|
} while ($count !== 0); |
4613
|
|
|
|
4614
|
113 |
|
return $str; |
4615
|
|
|
} |
4616
|
|
|
|
4617
|
|
|
/** |
4618
|
|
|
* Returns a new string with the prefix $substring removed, if present. |
4619
|
|
|
* |
4620
|
|
|
* @param string $str |
4621
|
|
|
* @param string $substring <p>The prefix to remove.</p> |
4622
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
4623
|
|
|
* |
4624
|
|
|
* @return string string without the prefix $substring |
4625
|
|
|
*/ |
4626
|
12 |
|
public static function remove_left(string $str, string $substring, string $encoding = 'UTF-8'): string |
4627
|
|
|
{ |
4628
|
12 |
|
if (self::str_starts_with($str, $substring)) { |
4629
|
6 |
|
return (string) self::substr( |
4630
|
6 |
|
$str, |
4631
|
6 |
|
self::strlen($substring, $encoding), |
|
|
|
|
4632
|
6 |
|
null, |
4633
|
6 |
|
$encoding |
4634
|
|
|
); |
4635
|
|
|
} |
4636
|
|
|
|
4637
|
6 |
|
return $str; |
4638
|
|
|
} |
4639
|
|
|
|
4640
|
|
|
/** |
4641
|
|
|
* Returns a new string with the suffix $substring removed, if present. |
4642
|
|
|
* |
4643
|
|
|
* @param string $str |
4644
|
|
|
* @param string $substring <p>The suffix to remove.</p> |
4645
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
4646
|
|
|
* |
4647
|
|
|
* @return string string having a $str without the suffix $substring |
4648
|
|
|
*/ |
4649
|
12 |
|
public static function remove_right(string $str, string $substring, string $encoding = 'UTF-8'): string |
4650
|
|
|
{ |
4651
|
12 |
|
if (self::str_ends_with($str, $substring)) { |
4652
|
6 |
|
return (string) self::substr( |
4653
|
6 |
|
$str, |
4654
|
6 |
|
0, |
4655
|
6 |
|
self::strlen($str, $encoding) - self::strlen($substring, $encoding) |
4656
|
|
|
); |
4657
|
|
|
} |
4658
|
|
|
|
4659
|
6 |
|
return $str; |
4660
|
|
|
} |
4661
|
|
|
|
4662
|
|
|
/** |
4663
|
|
|
* Replaces all occurrences of $search in $str by $replacement. |
4664
|
|
|
* |
4665
|
|
|
* @param string $str <p>The input string.</p> |
4666
|
|
|
* @param string $search <p>The needle to search for.</p> |
4667
|
|
|
* @param string $replacement <p>The string to replace with.</p> |
4668
|
|
|
* @param bool $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
4669
|
|
|
* |
4670
|
|
|
* @return string string after the replacements |
4671
|
|
|
*/ |
4672
|
29 |
|
public static function replace(string $str, string $search, string $replacement, bool $caseSensitive = true): string |
4673
|
|
|
{ |
4674
|
29 |
|
if ($caseSensitive) { |
4675
|
22 |
|
return self::str_replace($search, $replacement, $str); |
4676
|
|
|
} |
4677
|
|
|
|
4678
|
7 |
|
return self::str_ireplace($search, $replacement, $str); |
4679
|
|
|
} |
4680
|
|
|
|
4681
|
|
|
/** |
4682
|
|
|
* Replaces all occurrences of $search in $str by $replacement. |
4683
|
|
|
* |
4684
|
|
|
* @param string $str <p>The input string.</p> |
4685
|
|
|
* @param array $search <p>The elements to search for.</p> |
4686
|
|
|
* @param array|string $replacement <p>The string to replace with.</p> |
4687
|
|
|
* @param bool $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
4688
|
|
|
* |
4689
|
|
|
* @return string string after the replacements |
4690
|
|
|
*/ |
4691
|
30 |
|
public static function replace_all(string $str, array $search, $replacement, bool $caseSensitive = true): string |
4692
|
|
|
{ |
4693
|
30 |
|
if ($caseSensitive) { |
4694
|
23 |
|
return self::str_replace($search, $replacement, $str); |
4695
|
|
|
} |
4696
|
|
|
|
4697
|
7 |
|
return self::str_ireplace($search, $replacement, $str); |
4698
|
|
|
} |
4699
|
|
|
|
4700
|
|
|
/** |
4701
|
|
|
* Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement. |
4702
|
|
|
* |
4703
|
|
|
* @param string $str <p>The input string</p> |
4704
|
|
|
* @param string $replacementChar <p>The replacement character.</p> |
4705
|
|
|
* @param bool $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p> |
4706
|
|
|
* |
4707
|
|
|
* @return string |
4708
|
|
|
*/ |
4709
|
62 |
|
public static function replace_diamond_question_mark(string $str, string $replacementChar = '', bool $processInvalidUtf8 = true): string |
4710
|
|
|
{ |
4711
|
62 |
|
if ($str === '') { |
4712
|
9 |
|
return ''; |
4713
|
|
|
} |
4714
|
|
|
|
4715
|
62 |
|
if ($processInvalidUtf8 === true) { |
4716
|
62 |
|
$replacementCharHelper = $replacementChar; |
4717
|
62 |
|
if ($replacementChar === '') { |
4718
|
62 |
|
$replacementCharHelper = 'none'; |
4719
|
|
|
} |
4720
|
|
|
|
4721
|
62 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
4722
|
|
|
self::checkForSupport(); |
4723
|
|
|
} |
4724
|
|
|
|
4725
|
62 |
|
if (self::$SUPPORT['mbstring'] === false) { |
4726
|
|
|
// if there is no native support for "mbstring", |
4727
|
|
|
// then we need to clean the string before ... |
4728
|
|
|
$str = self::clean($str); |
4729
|
|
|
} |
4730
|
|
|
|
4731
|
|
|
// always fallback via symfony polyfill |
4732
|
62 |
|
$save = \mb_substitute_character(); |
4733
|
62 |
|
\mb_substitute_character($replacementCharHelper); |
4734
|
62 |
|
$strTmp = \mb_convert_encoding($str, 'UTF-8', 'UTF-8'); |
4735
|
62 |
|
\mb_substitute_character($save); |
4736
|
|
|
|
4737
|
62 |
|
if (\is_string($strTmp)) { |
|
|
|
|
4738
|
62 |
|
$str = $strTmp; |
4739
|
|
|
} else { |
4740
|
|
|
$str = ''; |
4741
|
|
|
} |
4742
|
|
|
} |
4743
|
|
|
|
4744
|
62 |
|
return \str_replace( |
4745
|
|
|
[ |
4746
|
62 |
|
"\xEF\xBF\xBD", |
4747
|
|
|
'�', |
4748
|
|
|
], |
4749
|
|
|
[ |
4750
|
62 |
|
$replacementChar, |
4751
|
62 |
|
$replacementChar, |
4752
|
|
|
], |
4753
|
62 |
|
$str |
4754
|
|
|
); |
4755
|
|
|
} |
4756
|
|
|
|
4757
|
|
|
/** |
4758
|
|
|
* Strip whitespace or other characters from end of a UTF-8 string. |
4759
|
|
|
* |
4760
|
|
|
* @param string $str <p>The string to be trimmed.</p> |
4761
|
|
|
* @param mixed $chars <p>Optional characters to be stripped.</p> |
4762
|
|
|
* |
4763
|
|
|
* @return string the string with unwanted characters stripped from the right |
4764
|
|
|
*/ |
4765
|
22 |
|
public static function rtrim(string $str = '', $chars = \INF): string |
4766
|
|
|
{ |
4767
|
22 |
|
if ($str === '') { |
4768
|
3 |
|
return ''; |
4769
|
|
|
} |
4770
|
|
|
|
4771
|
|
|
// Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories |
4772
|
21 |
|
if ($chars === \INF || !$chars) { |
4773
|
16 |
|
$pattern = "[\pZ\pC]+\$"; |
4774
|
|
|
} else { |
4775
|
8 |
|
$chars = \preg_quote($chars, '/'); |
4776
|
8 |
|
$pattern = "[${chars}]+\$"; |
4777
|
|
|
} |
4778
|
|
|
|
4779
|
21 |
|
return self::regex_replace($str, $pattern, '', '', '/'); |
4780
|
|
|
} |
4781
|
|
|
|
4782
|
|
|
/** |
4783
|
|
|
* rxClass |
4784
|
|
|
* |
4785
|
|
|
* @param string $s |
4786
|
|
|
* @param string $class |
4787
|
|
|
* |
4788
|
|
|
* @return string |
4789
|
|
|
*/ |
4790
|
42 |
|
private static function rxClass(string $s, string $class = ''): string |
4791
|
|
|
{ |
4792
|
42 |
|
static $RX_CLASSS_CACHE = []; |
4793
|
|
|
|
4794
|
42 |
|
$cacheKey = $s . $class; |
4795
|
|
|
|
4796
|
42 |
|
if (isset($RX_CLASSS_CACHE[$cacheKey])) { |
4797
|
30 |
|
return $RX_CLASSS_CACHE[$cacheKey]; |
4798
|
|
|
} |
4799
|
|
|
|
4800
|
|
|
/** @noinspection CallableParameterUseCaseInTypeContextInspection */ |
4801
|
16 |
|
$class = [$class]; |
4802
|
|
|
|
4803
|
|
|
/** @noinspection SuspiciousLoopInspection */ |
4804
|
16 |
|
foreach (self::str_split($s) as $s) { |
4805
|
15 |
|
if ($s === '-') { |
4806
|
|
|
$class[0] = '-' . $class[0]; |
4807
|
15 |
|
} elseif (!isset($s[2])) { |
4808
|
15 |
|
$class[0] .= \preg_quote($s, '/'); |
4809
|
1 |
|
} elseif (self::strlen($s) === 1) { |
4810
|
1 |
|
$class[0] .= $s; |
4811
|
|
|
} else { |
4812
|
15 |
|
$class[] = $s; |
4813
|
|
|
} |
4814
|
|
|
} |
4815
|
|
|
|
4816
|
16 |
|
if ($class[0]) { |
4817
|
16 |
|
$class[0] = '[' . $class[0] . ']'; |
4818
|
|
|
} |
4819
|
|
|
|
4820
|
16 |
|
if (\count($class) === 1) { |
4821
|
16 |
|
$return = $class[0]; |
4822
|
|
|
} else { |
4823
|
|
|
$return = '(?:' . \implode('|', $class) . ')'; |
4824
|
|
|
} |
4825
|
|
|
|
4826
|
16 |
|
$RX_CLASSS_CACHE[$cacheKey] = $return; |
4827
|
|
|
|
4828
|
16 |
|
return $return; |
4829
|
|
|
} |
4830
|
|
|
|
4831
|
|
|
/** |
4832
|
|
|
* WARNING: Print native UTF-8 support (libs), e.g. for debugging. |
4833
|
|
|
*/ |
4834
|
2 |
|
public static function showSupport() |
4835
|
|
|
{ |
4836
|
2 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
4837
|
|
|
self::checkForSupport(); |
4838
|
|
|
} |
4839
|
|
|
|
4840
|
2 |
|
echo '<pre>'; |
4841
|
2 |
|
foreach (self::$SUPPORT as $key => $value) { |
4842
|
2 |
|
echo $key . ' - ' . \print_r($value, true) . "\n<br>"; |
4843
|
|
|
} |
4844
|
2 |
|
echo '</pre>'; |
4845
|
2 |
|
} |
4846
|
|
|
|
4847
|
|
|
/** |
4848
|
|
|
* Converts a UTF-8 character to HTML Numbered Entity like "{". |
4849
|
|
|
* |
4850
|
|
|
* @param string $char <p>The Unicode character to be encoded as numbered entity.</p> |
4851
|
|
|
* @param bool $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</> |
4852
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
4853
|
|
|
* |
4854
|
|
|
* @return string the HTML numbered entity |
4855
|
|
|
*/ |
4856
|
2 |
|
public static function single_chr_html_encode(string $char, bool $keepAsciiChars = false, string $encoding = 'UTF-8'): string |
4857
|
|
|
{ |
4858
|
2 |
|
if ($char === '') { |
4859
|
2 |
|
return ''; |
4860
|
|
|
} |
4861
|
|
|
|
4862
|
|
|
if ( |
4863
|
2 |
|
$keepAsciiChars === true |
4864
|
|
|
&& |
4865
|
2 |
|
self::is_ascii($char) === true |
4866
|
|
|
) { |
4867
|
2 |
|
return $char; |
4868
|
|
|
} |
4869
|
|
|
|
4870
|
2 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
4871
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
4872
|
|
|
} |
4873
|
|
|
|
4874
|
2 |
|
return '&#' . self::ord($char, $encoding) . ';'; |
4875
|
|
|
} |
4876
|
|
|
|
4877
|
|
|
/** |
4878
|
|
|
* @param string $str |
4879
|
|
|
* @param int $tabLength |
4880
|
|
|
* |
4881
|
|
|
* @return string |
4882
|
|
|
*/ |
4883
|
5 |
|
public static function spaces_to_tabs(string $str, int $tabLength = 4): string |
4884
|
|
|
{ |
4885
|
5 |
|
return \str_replace(\str_repeat(' ', $tabLength), "\t", $str); |
4886
|
|
|
} |
4887
|
|
|
|
4888
|
|
|
/** |
4889
|
|
|
* Convert a string to an array of Unicode characters. |
4890
|
|
|
* |
4891
|
|
|
* @param int|int[]|string|string[] $str <p>The string to split into array.</p> |
4892
|
|
|
* @param int $length [optional] <p>Max character length of each array element.</p> |
4893
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
4894
|
|
|
* |
4895
|
|
|
* @return string[] an array containing chunks of the string |
4896
|
|
|
*/ |
4897
|
87 |
|
public static function split($str, int $length = 1, bool $cleanUtf8 = false): array |
4898
|
|
|
{ |
4899
|
87 |
|
if ($length <= 0) { |
4900
|
3 |
|
return []; |
4901
|
|
|
} |
4902
|
|
|
|
4903
|
86 |
|
if (\is_array($str) === true) { |
4904
|
2 |
|
foreach ($str as $k => $v) { |
4905
|
2 |
|
$str[$k] = self::split($v, $length); |
4906
|
|
|
} |
4907
|
|
|
|
4908
|
2 |
|
return $str; |
4909
|
|
|
} |
4910
|
|
|
|
4911
|
|
|
// init |
4912
|
86 |
|
$str = (string) $str; |
4913
|
|
|
|
4914
|
86 |
|
if ($str === '') { |
4915
|
13 |
|
return []; |
4916
|
|
|
} |
4917
|
|
|
|
4918
|
|
|
// init |
4919
|
83 |
|
$ret = []; |
4920
|
|
|
|
4921
|
83 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
4922
|
|
|
self::checkForSupport(); |
4923
|
|
|
} |
4924
|
|
|
|
4925
|
83 |
|
if ($cleanUtf8 === true) { |
4926
|
19 |
|
$str = self::clean($str); |
4927
|
|
|
} |
4928
|
|
|
|
4929
|
83 |
|
if (self::$SUPPORT['pcre_utf8'] === true) { |
4930
|
79 |
|
\preg_match_all('/./us', $str, $retArray); |
4931
|
79 |
|
if (isset($retArray[0])) { |
4932
|
79 |
|
$ret = $retArray[0]; |
4933
|
|
|
} |
4934
|
79 |
|
unset($retArray); |
4935
|
|
|
} else { |
4936
|
|
|
|
4937
|
|
|
// fallback |
4938
|
|
|
|
4939
|
8 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
4940
|
|
|
self::checkForSupport(); |
4941
|
|
|
} |
4942
|
|
|
|
4943
|
8 |
|
$len = self::strlen_in_byte($str); |
4944
|
|
|
|
4945
|
|
|
/** @noinspection ForeachInvariantsInspection */ |
4946
|
8 |
|
for ($i = 0; $i < $len; $i++) { |
4947
|
8 |
|
if (($str[$i] & "\x80") === "\x00") { |
4948
|
8 |
|
$ret[] = $str[$i]; |
4949
|
|
|
} elseif ( |
4950
|
8 |
|
isset($str[$i + 1]) |
4951
|
|
|
&& |
4952
|
8 |
|
($str[$i] & "\xE0") === "\xC0" |
4953
|
|
|
) { |
4954
|
4 |
|
if (($str[$i + 1] & "\xC0") === "\x80") { |
4955
|
4 |
|
$ret[] = $str[$i] . $str[$i + 1]; |
4956
|
|
|
|
4957
|
4 |
|
$i++; |
4958
|
|
|
} |
4959
|
|
|
} elseif ( |
4960
|
6 |
|
isset($str[$i + 2]) |
4961
|
|
|
&& |
4962
|
6 |
|
($str[$i] & "\xF0") === "\xE0" |
4963
|
|
|
) { |
4964
|
|
|
if ( |
4965
|
6 |
|
($str[$i + 1] & "\xC0") === "\x80" |
4966
|
|
|
&& |
4967
|
6 |
|
($str[$i + 2] & "\xC0") === "\x80" |
4968
|
|
|
) { |
4969
|
6 |
|
$ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2]; |
4970
|
|
|
|
4971
|
6 |
|
$i += 2; |
4972
|
|
|
} |
4973
|
|
|
} elseif ( |
4974
|
|
|
isset($str[$i + 3]) |
4975
|
|
|
&& |
4976
|
|
|
($str[$i] & "\xF8") === "\xF0" |
4977
|
|
|
) { |
4978
|
|
|
if ( |
4979
|
|
|
($str[$i + 1] & "\xC0") === "\x80" |
4980
|
|
|
&& |
4981
|
|
|
($str[$i + 2] & "\xC0") === "\x80" |
4982
|
|
|
&& |
4983
|
|
|
($str[$i + 3] & "\xC0") === "\x80" |
4984
|
|
|
) { |
4985
|
|
|
$ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3]; |
4986
|
|
|
|
4987
|
|
|
$i += 3; |
4988
|
|
|
} |
4989
|
|
|
} |
4990
|
|
|
} |
4991
|
|
|
} |
4992
|
|
|
|
4993
|
83 |
|
if ($length > 1) { |
4994
|
11 |
|
$ret = \array_chunk($ret, $length); |
4995
|
|
|
|
4996
|
11 |
|
return \array_map( |
4997
|
|
|
function ($item) { |
4998
|
11 |
|
return \implode('', $item); |
4999
|
11 |
|
}, |
5000
|
11 |
|
$ret |
5001
|
|
|
); |
5002
|
|
|
} |
5003
|
|
|
|
5004
|
76 |
|
if (isset($ret[0]) && $ret[0] === '') { |
5005
|
|
|
return []; |
5006
|
|
|
} |
5007
|
|
|
|
5008
|
76 |
|
return $ret; |
5009
|
|
|
} |
5010
|
|
|
|
5011
|
|
|
/** |
5012
|
|
|
* Returns a camelCase version of the string. Trims surrounding spaces, |
5013
|
|
|
* capitalizes letters following digits, spaces, dashes and underscores, |
5014
|
|
|
* and removes spaces, dashes, as well as underscores. |
5015
|
|
|
* |
5016
|
|
|
* @param string $str <p>The input string.</p> |
5017
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
5018
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
5019
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
5020
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
5021
|
|
|
* |
5022
|
|
|
* @return string |
5023
|
|
|
*/ |
5024
|
32 |
|
public static function str_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string |
5025
|
|
|
{ |
5026
|
32 |
|
$str = self::lcfirst(self::trim($str), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
5027
|
32 |
|
$str = (string) \preg_replace('/^[-_]+/', '', $str); |
5028
|
|
|
|
5029
|
32 |
|
$str = (string) \preg_replace_callback( |
5030
|
32 |
|
'/[-_\s]+(.)?/u', |
5031
|
|
|
function ($match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength) { |
5032
|
27 |
|
if (isset($match[1])) { |
5033
|
27 |
|
return self::strtoupper($match[1], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
5034
|
|
|
} |
5035
|
|
|
|
5036
|
1 |
|
return ''; |
5037
|
32 |
|
}, |
5038
|
32 |
|
$str |
5039
|
|
|
); |
5040
|
|
|
|
5041
|
32 |
|
$str = (string) \preg_replace_callback( |
5042
|
32 |
|
'/[\d]+(.)?/u', |
5043
|
|
|
function ($match) use ($encoding, $cleanUtf8, $lang, $tryToKeepStringLength) { |
5044
|
6 |
|
return self::strtoupper($match[0], $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
5045
|
32 |
|
}, |
5046
|
32 |
|
$str |
5047
|
|
|
); |
5048
|
|
|
|
5049
|
32 |
|
return $str; |
5050
|
|
|
} |
5051
|
|
|
|
5052
|
|
|
/** |
5053
|
|
|
* Returns the string with the first letter of each word capitalized, |
5054
|
|
|
* except for when the word is a name which shouldn't be capitalized. |
5055
|
|
|
* |
5056
|
|
|
* @param string $str |
5057
|
|
|
* |
5058
|
|
|
* @return string string with $str capitalized |
5059
|
|
|
*/ |
5060
|
1 |
|
public static function str_capitalize_name(string $str): string |
5061
|
|
|
{ |
5062
|
1 |
|
$str = self::collapse_whitespace($str); |
5063
|
|
|
|
5064
|
1 |
|
$str = self::str_capitalize_name_helper($str, ' '); |
5065
|
|
|
|
5066
|
1 |
|
return self::str_capitalize_name_helper($str, '-'); |
5067
|
|
|
} |
5068
|
|
|
|
5069
|
|
|
/** |
5070
|
|
|
* Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius"). |
5071
|
|
|
* |
5072
|
|
|
* @param string $names |
5073
|
|
|
* @param string $delimiter |
5074
|
|
|
* @param string $encoding |
5075
|
|
|
* |
5076
|
|
|
* @return string |
5077
|
|
|
*/ |
5078
|
1 |
|
private static function str_capitalize_name_helper(string $names, string $delimiter, string $encoding = 'UTF-8'): string |
5079
|
|
|
{ |
5080
|
|
|
// init |
5081
|
1 |
|
$namesArray = \explode($delimiter, $names); |
5082
|
|
|
|
5083
|
1 |
|
if ($namesArray === false) { |
5084
|
|
|
return ''; |
5085
|
|
|
} |
5086
|
|
|
|
5087
|
|
|
$specialCases = [ |
5088
|
1 |
|
'names' => [ |
5089
|
|
|
'ab', |
5090
|
|
|
'af', |
5091
|
|
|
'al', |
5092
|
|
|
'and', |
5093
|
|
|
'ap', |
5094
|
|
|
'bint', |
5095
|
|
|
'binte', |
5096
|
|
|
'da', |
5097
|
|
|
'de', |
5098
|
|
|
'del', |
5099
|
|
|
'den', |
5100
|
|
|
'der', |
5101
|
|
|
'di', |
5102
|
|
|
'dit', |
5103
|
|
|
'ibn', |
5104
|
|
|
'la', |
5105
|
|
|
'mac', |
5106
|
|
|
'nic', |
5107
|
|
|
'of', |
5108
|
|
|
'ter', |
5109
|
|
|
'the', |
5110
|
|
|
'und', |
5111
|
|
|
'van', |
5112
|
|
|
'von', |
5113
|
|
|
'y', |
5114
|
|
|
'zu', |
5115
|
|
|
], |
5116
|
|
|
'prefixes' => [ |
5117
|
|
|
'al-', |
5118
|
|
|
"d'", |
5119
|
|
|
'ff', |
5120
|
|
|
"l'", |
5121
|
|
|
'mac', |
5122
|
|
|
'mc', |
5123
|
|
|
'nic', |
5124
|
|
|
], |
5125
|
|
|
]; |
5126
|
|
|
|
5127
|
1 |
|
foreach ($namesArray as &$name) { |
5128
|
1 |
|
if (\in_array($name, $specialCases['names'], true)) { |
5129
|
1 |
|
continue; |
5130
|
|
|
} |
5131
|
|
|
|
5132
|
1 |
|
$continue = false; |
5133
|
|
|
|
5134
|
1 |
|
if ($delimiter === '-') { |
5135
|
1 |
|
foreach ($specialCases['names'] as $beginning) { |
5136
|
1 |
|
if (self::strpos($name, $beginning, 0, $encoding) === 0) { |
5137
|
1 |
|
$continue = true; |
5138
|
|
|
} |
5139
|
|
|
} |
5140
|
|
|
} |
5141
|
|
|
|
5142
|
1 |
|
foreach ($specialCases['prefixes'] as $beginning) { |
5143
|
1 |
|
if (self::strpos($name, $beginning, 0, $encoding) === 0) { |
5144
|
1 |
|
$continue = true; |
5145
|
|
|
} |
5146
|
|
|
} |
5147
|
|
|
|
5148
|
1 |
|
if ($continue) { |
5149
|
1 |
|
continue; |
5150
|
|
|
} |
5151
|
|
|
|
5152
|
1 |
|
$name = self::str_upper_first($name); |
5153
|
|
|
} |
5154
|
|
|
|
5155
|
1 |
|
return \implode($delimiter, $namesArray); |
5156
|
|
|
} |
5157
|
|
|
|
5158
|
|
|
/** |
5159
|
|
|
* Returns true if the string contains $needle, false otherwise. By default |
5160
|
|
|
* the comparison is case-sensitive, but can be made insensitive by setting |
5161
|
|
|
* $caseSensitive to false. |
5162
|
|
|
* |
5163
|
|
|
* @param string $haystack <p>The input string.</p> |
5164
|
|
|
* @param string $needle <p>Substring to look for.</p> |
5165
|
|
|
* @param bool $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
5166
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5167
|
|
|
* |
5168
|
|
|
* @return bool whether or not $haystack contains $needle |
5169
|
|
|
*/ |
5170
|
106 |
|
public static function str_contains(string $haystack, string $needle, $caseSensitive = true, string $encoding = 'UTF-8'): bool |
5171
|
|
|
{ |
5172
|
106 |
|
if ($haystack === '' || $needle === '') { |
5173
|
1 |
|
return false; |
5174
|
|
|
} |
5175
|
|
|
|
5176
|
|
|
// only a fallback to prevent BC in the api ... |
5177
|
105 |
|
if ($caseSensitive !== false && $caseSensitive !== true) { |
|
|
|
|
5178
|
2 |
|
$encoding = (string) $caseSensitive; |
5179
|
|
|
} |
5180
|
|
|
|
5181
|
105 |
|
if ($caseSensitive) { |
5182
|
55 |
|
return self::strpos($haystack, $needle, 0, $encoding) !== false; |
5183
|
|
|
} |
5184
|
|
|
|
5185
|
50 |
|
return self::stripos($haystack, $needle, 0, $encoding) !== false; |
5186
|
|
|
} |
5187
|
|
|
|
5188
|
|
|
/** |
5189
|
|
|
* Returns true if the string contains all $needles, false otherwise. By |
5190
|
|
|
* default the comparison is case-sensitive, but can be made insensitive by |
5191
|
|
|
* setting $caseSensitive to false. |
5192
|
|
|
* |
5193
|
|
|
* @param string $haystack <p>The input string.</p> |
5194
|
|
|
* @param array $needles <p>SubStrings to look for.</p> |
5195
|
|
|
* @param bool $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
5196
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5197
|
|
|
* |
5198
|
|
|
* @return bool whether or not $haystack contains $needle |
5199
|
|
|
*/ |
5200
|
44 |
|
public static function str_contains_all(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool |
5201
|
|
|
{ |
5202
|
44 |
|
if ($haystack === '') { |
5203
|
|
|
return false; |
5204
|
|
|
} |
5205
|
|
|
|
5206
|
44 |
|
if (empty($needles)) { |
5207
|
1 |
|
return false; |
5208
|
|
|
} |
5209
|
|
|
|
5210
|
|
|
// only a fallback to prevent BC in the api ... |
5211
|
43 |
|
if ($caseSensitive !== false && $caseSensitive !== true) { |
|
|
|
|
5212
|
1 |
|
$encoding = (string) $caseSensitive; |
5213
|
|
|
} |
5214
|
|
|
|
5215
|
43 |
|
foreach ($needles as $needle) { |
5216
|
43 |
|
if (!self::str_contains($haystack, $needle, $caseSensitive, $encoding)) { |
5217
|
43 |
|
return false; |
5218
|
|
|
} |
5219
|
|
|
} |
5220
|
|
|
|
5221
|
24 |
|
return true; |
5222
|
|
|
} |
5223
|
|
|
|
5224
|
|
|
/** |
5225
|
|
|
* Returns true if the string contains any $needles, false otherwise. By |
5226
|
|
|
* default the comparison is case-sensitive, but can be made insensitive by |
5227
|
|
|
* setting $caseSensitive to false. |
5228
|
|
|
* |
5229
|
|
|
* @param string $haystack <p>The input string.</p> |
5230
|
|
|
* @param array $needles <p>SubStrings to look for.</p> |
5231
|
|
|
* @param bool $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
5232
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5233
|
|
|
* |
5234
|
|
|
* @return bool |
5235
|
|
|
* Whether or not $str contains $needle |
5236
|
|
|
*/ |
5237
|
43 |
|
public static function str_contains_any(string $haystack, array $needles, $caseSensitive = true, string $encoding = 'UTF-8'): bool |
5238
|
|
|
{ |
5239
|
43 |
|
if (empty($needles)) { |
5240
|
1 |
|
return false; |
5241
|
|
|
} |
5242
|
|
|
|
5243
|
42 |
|
foreach ($needles as $needle) { |
5244
|
42 |
|
if (self::str_contains($haystack, $needle, $caseSensitive, $encoding)) { |
5245
|
42 |
|
return true; |
5246
|
|
|
} |
5247
|
|
|
} |
5248
|
|
|
|
5249
|
18 |
|
return false; |
5250
|
|
|
} |
5251
|
|
|
|
5252
|
|
|
/** |
5253
|
|
|
* Returns a lowercase and trimmed string separated by dashes. Dashes are |
5254
|
|
|
* inserted before uppercase characters (with the exception of the first |
5255
|
|
|
* character of the string), and in place of spaces as well as underscores. |
5256
|
|
|
* |
5257
|
|
|
* @param string $str <p>The input string.</p> |
5258
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5259
|
|
|
* |
5260
|
|
|
* @return string |
5261
|
|
|
*/ |
5262
|
19 |
|
public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string |
5263
|
|
|
{ |
5264
|
19 |
|
return self::str_delimit($str, '-', $encoding); |
5265
|
|
|
} |
5266
|
|
|
|
5267
|
|
|
/** |
5268
|
|
|
* Returns a lowercase and trimmed string separated by the given delimiter. |
5269
|
|
|
* Delimiters are inserted before uppercase characters (with the exception |
5270
|
|
|
* of the first character of the string), and in place of spaces, dashes, |
5271
|
|
|
* and underscores. Alpha delimiters are not converted to lowercase. |
5272
|
|
|
* |
5273
|
|
|
* @param string $str <p>The input string.</p> |
5274
|
|
|
* @param string $delimiter <p>Sequence used to separate parts of the string.</p> |
5275
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5276
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
5277
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
5278
|
|
|
* tr</p> |
5279
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> |
5280
|
|
|
* ß</p> |
5281
|
|
|
* |
5282
|
|
|
* @return string |
5283
|
|
|
*/ |
5284
|
49 |
|
public static function str_delimit( |
5285
|
|
|
string $str, |
5286
|
|
|
string $delimiter, |
5287
|
|
|
string $encoding = 'UTF-8', |
5288
|
|
|
bool $cleanUtf8 = false, |
5289
|
|
|
string $lang = null, |
5290
|
|
|
bool $tryToKeepStringLength = false |
5291
|
|
|
): string { |
5292
|
49 |
|
$str = self::trim($str); |
5293
|
|
|
|
5294
|
49 |
|
$str = (string) \preg_replace('/\B([A-Z])/u', '-\1', $str); |
5295
|
|
|
|
5296
|
49 |
|
$str = self::strtolower($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
5297
|
|
|
|
5298
|
49 |
|
return (string) \preg_replace('/[-_\s]+/u', $delimiter, $str); |
5299
|
|
|
} |
5300
|
|
|
|
5301
|
|
|
/** |
5302
|
|
|
* Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32. |
5303
|
|
|
* |
5304
|
|
|
* @param string $str <p>The input string.</p> |
5305
|
|
|
* |
5306
|
|
|
* @return false|string |
5307
|
|
|
* The detected string-encoding e.g. UTF-8 or UTF-16BE,<br> |
5308
|
|
|
* otherwise it will return false e.g. for BINARY or not detected encoding. |
5309
|
|
|
*/ |
5310
|
30 |
|
public static function str_detect_encoding($str) |
5311
|
|
|
{ |
5312
|
|
|
// init |
5313
|
30 |
|
$str = (string) $str; |
5314
|
|
|
|
5315
|
|
|
// |
5316
|
|
|
// 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ... |
5317
|
|
|
// |
5318
|
|
|
|
5319
|
30 |
|
if (self::is_binary($str, true) === true) { |
5320
|
10 |
|
$isUtf16 = self::is_utf16($str, false); |
5321
|
10 |
|
if ($isUtf16 === 1) { |
5322
|
2 |
|
return 'UTF-16LE'; |
5323
|
|
|
} |
5324
|
10 |
|
if ($isUtf16 === 2) { |
5325
|
2 |
|
return 'UTF-16BE'; |
5326
|
|
|
} |
5327
|
|
|
|
5328
|
8 |
|
$isUtf32 = self::is_utf32($str, false); |
5329
|
8 |
|
if ($isUtf32 === 1) { |
5330
|
|
|
return 'UTF-32LE'; |
5331
|
|
|
} |
5332
|
8 |
|
if ($isUtf32 === 2) { |
5333
|
|
|
return 'UTF-32BE'; |
5334
|
|
|
} |
5335
|
|
|
|
5336
|
|
|
// is binary but not "UTF-16" or "UTF-32" |
5337
|
8 |
|
return false; |
5338
|
|
|
} |
5339
|
|
|
|
5340
|
|
|
// |
5341
|
|
|
// 2.) simple check for ASCII chars |
5342
|
|
|
// |
5343
|
|
|
|
5344
|
26 |
|
if (self::is_ascii($str) === true) { |
5345
|
9 |
|
return 'ASCII'; |
5346
|
|
|
} |
5347
|
|
|
|
5348
|
|
|
// |
5349
|
|
|
// 3.) simple check for UTF-8 chars |
5350
|
|
|
// |
5351
|
|
|
|
5352
|
26 |
|
if (self::is_utf8($str) === true) { |
5353
|
18 |
|
return 'UTF-8'; |
5354
|
|
|
} |
5355
|
|
|
|
5356
|
|
|
// |
5357
|
|
|
// 4.) check via "mb_detect_encoding()" |
5358
|
|
|
// |
5359
|
|
|
// INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()" |
5360
|
|
|
|
5361
|
|
|
$detectOrder = [ |
5362
|
16 |
|
'ISO-8859-1', |
5363
|
|
|
'ISO-8859-2', |
5364
|
|
|
'ISO-8859-3', |
5365
|
|
|
'ISO-8859-4', |
5366
|
|
|
'ISO-8859-5', |
5367
|
|
|
'ISO-8859-6', |
5368
|
|
|
'ISO-8859-7', |
5369
|
|
|
'ISO-8859-8', |
5370
|
|
|
'ISO-8859-9', |
5371
|
|
|
'ISO-8859-10', |
5372
|
|
|
'ISO-8859-13', |
5373
|
|
|
'ISO-8859-14', |
5374
|
|
|
'ISO-8859-15', |
5375
|
|
|
'ISO-8859-16', |
5376
|
|
|
'WINDOWS-1251', |
5377
|
|
|
'WINDOWS-1252', |
5378
|
|
|
'WINDOWS-1254', |
5379
|
|
|
'CP932', |
5380
|
|
|
'CP936', |
5381
|
|
|
'CP950', |
5382
|
|
|
'CP866', |
5383
|
|
|
'CP850', |
5384
|
|
|
'CP51932', |
5385
|
|
|
'CP50220', |
5386
|
|
|
'CP50221', |
5387
|
|
|
'CP50222', |
5388
|
|
|
'ISO-2022-JP', |
5389
|
|
|
'ISO-2022-KR', |
5390
|
|
|
'JIS', |
5391
|
|
|
'JIS-ms', |
5392
|
|
|
'EUC-CN', |
5393
|
|
|
'EUC-JP', |
5394
|
|
|
]; |
5395
|
|
|
|
5396
|
16 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
5397
|
|
|
self::checkForSupport(); |
5398
|
|
|
} |
5399
|
|
|
|
5400
|
16 |
|
if (self::$SUPPORT['mbstring'] === true) { |
5401
|
|
|
// info: do not use the symfony polyfill here |
5402
|
16 |
|
$encoding = \mb_detect_encoding($str, $detectOrder, true); |
5403
|
16 |
|
if ($encoding) { |
5404
|
16 |
|
return $encoding; |
5405
|
|
|
} |
5406
|
|
|
} |
5407
|
|
|
|
5408
|
|
|
// |
5409
|
|
|
// 5.) check via "iconv()" |
5410
|
|
|
// |
5411
|
|
|
|
5412
|
|
|
if (self::$ENCODINGS === null) { |
5413
|
|
|
self::$ENCODINGS = self::getData('encodings'); |
5414
|
|
|
} |
5415
|
|
|
|
5416
|
|
|
foreach (self::$ENCODINGS as $encodingTmp) { |
5417
|
|
|
// INFO: //IGNORE but still throw notice |
5418
|
|
|
/** @noinspection PhpUsageOfSilenceOperatorInspection */ |
5419
|
|
|
if ((string) @\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str) === $str) { |
5420
|
|
|
return $encodingTmp; |
5421
|
|
|
} |
5422
|
|
|
} |
5423
|
|
|
|
5424
|
|
|
return false; |
5425
|
|
|
} |
5426
|
|
|
|
5427
|
|
|
/** |
5428
|
|
|
* Check if the string ends with the given substring. |
5429
|
|
|
* |
5430
|
|
|
* @param string $haystack <p>The string to search in.</p> |
5431
|
|
|
* @param string $needle <p>The substring to search for.</p> |
5432
|
|
|
* |
5433
|
|
|
* @return bool |
5434
|
|
|
*/ |
5435
|
40 |
|
public static function str_ends_with(string $haystack, string $needle): bool |
5436
|
|
|
{ |
5437
|
40 |
|
if ($haystack === '' || $needle === '') { |
5438
|
4 |
|
return false; |
5439
|
|
|
} |
5440
|
|
|
|
5441
|
38 |
|
return \substr($haystack, -\strlen($needle)) === $needle; |
5442
|
|
|
} |
5443
|
|
|
|
5444
|
|
|
/** |
5445
|
|
|
* Returns true if the string ends with any of $substrings, false otherwise. |
5446
|
|
|
* |
5447
|
|
|
* - case-sensitive |
5448
|
|
|
* |
5449
|
|
|
* @param string $str <p>The input string.</p> |
5450
|
|
|
* @param string[] $substrings <p>Substrings to look for.</p> |
5451
|
|
|
* |
5452
|
|
|
* @return bool whether or not $str ends with $substring |
5453
|
|
|
*/ |
5454
|
7 |
|
public static function str_ends_with_any(string $str, array $substrings): bool |
5455
|
|
|
{ |
5456
|
7 |
|
if (empty($substrings)) { |
5457
|
|
|
return false; |
5458
|
|
|
} |
5459
|
|
|
|
5460
|
7 |
|
foreach ($substrings as $substring) { |
5461
|
7 |
|
if (self::str_ends_with($str, $substring)) { |
5462
|
7 |
|
return true; |
5463
|
|
|
} |
5464
|
|
|
} |
5465
|
|
|
|
5466
|
6 |
|
return false; |
5467
|
|
|
} |
5468
|
|
|
|
5469
|
|
|
/** |
5470
|
|
|
* Ensures that the string begins with $substring. If it doesn't, it's |
5471
|
|
|
* prepended. |
5472
|
|
|
* |
5473
|
|
|
* @param string $str <p>The input string.</p> |
5474
|
|
|
* @param string $substring <p>The substring to add if not present.</p> |
5475
|
|
|
* |
5476
|
|
|
* @return string |
5477
|
|
|
*/ |
5478
|
10 |
|
public static function str_ensure_left(string $str, string $substring): string |
5479
|
|
|
{ |
5480
|
10 |
|
if (!self::str_starts_with($str, $substring)) { |
5481
|
4 |
|
$str = $substring . $str; |
5482
|
|
|
} |
5483
|
|
|
|
5484
|
10 |
|
return $str; |
5485
|
|
|
} |
5486
|
|
|
|
5487
|
|
|
/** |
5488
|
|
|
* Ensures that the string ends with $substring. If it doesn't, it's appended. |
5489
|
|
|
* |
5490
|
|
|
* @param string $str <p>The input string.</p> |
5491
|
|
|
* @param string $substring <p>The substring to add if not present.</p> |
5492
|
|
|
* |
5493
|
|
|
* @return string |
5494
|
|
|
*/ |
5495
|
10 |
|
public static function str_ensure_right(string $str, string $substring): string |
5496
|
|
|
{ |
5497
|
10 |
|
if (!self::str_ends_with($str, $substring)) { |
5498
|
4 |
|
$str .= $substring; |
5499
|
|
|
} |
5500
|
|
|
|
5501
|
10 |
|
return $str; |
5502
|
|
|
} |
5503
|
|
|
|
5504
|
|
|
/** |
5505
|
|
|
* Capitalizes the first word of the string, replaces underscores with |
5506
|
|
|
* spaces, and strips '_id'. |
5507
|
|
|
* |
5508
|
|
|
* @param string $str |
5509
|
|
|
* |
5510
|
|
|
* @return string |
5511
|
|
|
*/ |
5512
|
3 |
|
public static function str_humanize($str): string |
5513
|
|
|
{ |
5514
|
3 |
|
$str = self::str_replace( |
5515
|
|
|
[ |
5516
|
3 |
|
'_id', |
5517
|
|
|
'_', |
5518
|
|
|
], |
5519
|
|
|
[ |
5520
|
3 |
|
'', |
5521
|
|
|
' ', |
5522
|
|
|
], |
5523
|
3 |
|
$str |
5524
|
|
|
); |
5525
|
|
|
|
5526
|
3 |
|
return self::ucfirst(self::trim($str)); |
5527
|
|
|
} |
5528
|
|
|
|
5529
|
|
|
/** |
5530
|
|
|
* Check if the string ends with the given substring, case insensitive. |
5531
|
|
|
* |
5532
|
|
|
* @param string $haystack <p>The string to search in.</p> |
5533
|
|
|
* @param string $needle <p>The substring to search for.</p> |
5534
|
|
|
* |
5535
|
|
|
* @return bool |
5536
|
|
|
*/ |
5537
|
12 |
|
public static function str_iends_with(string $haystack, string $needle): bool |
5538
|
|
|
{ |
5539
|
12 |
|
if ($haystack === '' || $needle === '') { |
5540
|
2 |
|
return false; |
5541
|
|
|
} |
5542
|
|
|
|
5543
|
12 |
|
if (self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0) { |
5544
|
12 |
|
return true; |
5545
|
|
|
} |
5546
|
|
|
|
5547
|
8 |
|
return false; |
5548
|
|
|
} |
5549
|
|
|
|
5550
|
|
|
/** |
5551
|
|
|
* Returns true if the string ends with any of $substrings, false otherwise. |
5552
|
|
|
* |
5553
|
|
|
* - case-insensitive |
5554
|
|
|
* |
5555
|
|
|
* @param string $str <p>The input string.</p> |
5556
|
|
|
* @param string[] $substrings <p>Substrings to look for.</p> |
5557
|
|
|
* |
5558
|
|
|
* @return bool whether or not $str ends with $substring |
5559
|
|
|
*/ |
5560
|
4 |
|
public static function str_iends_with_any(string $str, array $substrings): bool |
5561
|
|
|
{ |
5562
|
4 |
|
if (empty($substrings)) { |
5563
|
|
|
return false; |
5564
|
|
|
} |
5565
|
|
|
|
5566
|
4 |
|
foreach ($substrings as $substring) { |
5567
|
4 |
|
if (self::str_iends_with($str, $substring)) { |
5568
|
4 |
|
return true; |
5569
|
|
|
} |
5570
|
|
|
} |
5571
|
|
|
|
5572
|
|
|
return false; |
5573
|
|
|
} |
5574
|
|
|
|
5575
|
|
|
/** |
5576
|
|
|
* Returns the index of the first occurrence of $needle in the string, |
5577
|
|
|
* and false if not found. Accepts an optional offset from which to begin |
5578
|
|
|
* the search. |
5579
|
|
|
* |
5580
|
|
|
* @param string $str <p>The input string.</p> |
5581
|
|
|
* @param string $needle <p>Substring to look for.</p> |
5582
|
|
|
* @param int $offset [optional] <p>Offset from which to search. Default: 0</p> |
5583
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
5584
|
|
|
* |
5585
|
|
|
* @return false|int |
5586
|
|
|
* The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong> |
5587
|
|
|
*/ |
5588
|
2 |
|
public static function str_iindex_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8') |
5589
|
|
|
{ |
5590
|
2 |
|
return self::stripos( |
5591
|
2 |
|
$str, |
5592
|
2 |
|
$needle, |
5593
|
2 |
|
$offset, |
5594
|
2 |
|
$encoding |
5595
|
|
|
); |
5596
|
|
|
} |
5597
|
|
|
|
5598
|
|
|
/** |
5599
|
|
|
* Returns the index of the last occurrence of $needle in the string, |
5600
|
|
|
* and false if not found. Accepts an optional offset from which to begin |
5601
|
|
|
* the search. Offsets may be negative to count from the last character |
5602
|
|
|
* in the string. |
5603
|
|
|
* |
5604
|
|
|
* @param string $str <p>The input string.</p> |
5605
|
|
|
* @param string $needle <p>Substring to look for.</p> |
5606
|
|
|
* @param int $offset [optional] <p>Offset from which to search. Default: 0</p> |
5607
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
5608
|
|
|
* |
5609
|
|
|
* @return false|int |
5610
|
|
|
* The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong> |
5611
|
|
|
*/ |
5612
|
2 |
|
public static function str_iindex_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8') |
5613
|
|
|
{ |
5614
|
2 |
|
return self::strripos( |
5615
|
2 |
|
$str, |
5616
|
2 |
|
$needle, |
5617
|
2 |
|
$offset, |
5618
|
2 |
|
$encoding |
5619
|
|
|
); |
5620
|
|
|
} |
5621
|
|
|
|
5622
|
|
|
/** |
5623
|
|
|
* Returns the index of the first occurrence of $needle in the string, |
5624
|
|
|
* and false if not found. Accepts an optional offset from which to begin |
5625
|
|
|
* the search. |
5626
|
|
|
* |
5627
|
|
|
* @param string $str <p>The input string.</p> |
5628
|
|
|
* @param string $needle <p>Substring to look for.</p> |
5629
|
|
|
* @param int $offset [optional] <p>Offset from which to search. Default: 0</p> |
5630
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
5631
|
|
|
* |
5632
|
|
|
* @return false|int |
5633
|
|
|
* The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong> |
5634
|
|
|
*/ |
5635
|
12 |
|
public static function str_index_first(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8') |
5636
|
|
|
{ |
5637
|
12 |
|
return self::strpos( |
5638
|
12 |
|
$str, |
5639
|
12 |
|
$needle, |
5640
|
12 |
|
$offset, |
5641
|
12 |
|
$encoding |
5642
|
|
|
); |
5643
|
|
|
} |
5644
|
|
|
|
5645
|
|
|
/** |
5646
|
|
|
* Returns the index of the last occurrence of $needle in the string, |
5647
|
|
|
* and false if not found. Accepts an optional offset from which to begin |
5648
|
|
|
* the search. Offsets may be negative to count from the last character |
5649
|
|
|
* in the string. |
5650
|
|
|
* |
5651
|
|
|
* @param string $str <p>The input string.</p> |
5652
|
|
|
* @param string $needle <p>Substring to look for.</p> |
5653
|
|
|
* @param int $offset [optional] <p>Offset from which to search. Default: 0</p> |
5654
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
5655
|
|
|
* |
5656
|
|
|
* @return false|int |
5657
|
|
|
* The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong> |
5658
|
|
|
*/ |
5659
|
12 |
|
public static function str_index_last(string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8') |
5660
|
|
|
{ |
5661
|
12 |
|
return self::strrpos( |
5662
|
12 |
|
$str, |
5663
|
12 |
|
$needle, |
5664
|
12 |
|
$offset, |
5665
|
12 |
|
$encoding |
5666
|
|
|
); |
5667
|
|
|
} |
5668
|
|
|
|
5669
|
|
|
/** |
5670
|
|
|
* Inserts $substring into the string at the $index provided. |
5671
|
|
|
* |
5672
|
|
|
* @param string $str <p>The input string.</p> |
5673
|
|
|
* @param string $substring <p>String to be inserted.</p> |
5674
|
|
|
* @param int $index <p>The index at which to insert the substring.</p> |
5675
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
5676
|
|
|
* |
5677
|
|
|
* @return string |
5678
|
|
|
*/ |
5679
|
8 |
|
public static function str_insert(string $str, string $substring, int $index, string $encoding = 'UTF-8'): string |
5680
|
|
|
{ |
5681
|
8 |
|
$len = self::strlen($str, $encoding); |
5682
|
|
|
|
5683
|
8 |
|
if ($index > $len) { |
5684
|
1 |
|
return $str; |
5685
|
|
|
} |
5686
|
|
|
|
5687
|
7 |
|
$start = self::substr($str, 0, $index, $encoding); |
5688
|
7 |
|
$end = self::substr($str, $index, $len, $encoding); |
|
|
|
|
5689
|
|
|
|
5690
|
7 |
|
return $start . $substring . $end; |
|
|
|
|
5691
|
|
|
} |
5692
|
|
|
|
5693
|
|
|
/** |
5694
|
|
|
* Case-insensitive and UTF-8 safe version of <function>str_replace</function>. |
5695
|
|
|
* |
5696
|
|
|
* @see http://php.net/manual/en/function.str-ireplace.php |
5697
|
|
|
* |
5698
|
|
|
* @param mixed $search <p> |
5699
|
|
|
* Every replacement with search array is |
5700
|
|
|
* performed on the result of previous replacement. |
5701
|
|
|
* </p> |
5702
|
|
|
* @param mixed $replace <p> |
5703
|
|
|
* </p> |
5704
|
|
|
* @param mixed $subject <p> |
5705
|
|
|
* If subject is an array, then the search and |
5706
|
|
|
* replace is performed with every entry of |
5707
|
|
|
* subject, and the return value is an array as |
5708
|
|
|
* well. |
5709
|
|
|
* </p> |
5710
|
|
|
* @param int $count [optional] <p> |
5711
|
|
|
* The number of matched and replaced needles will |
5712
|
|
|
* be returned in count which is passed by |
5713
|
|
|
* reference. |
5714
|
|
|
* </p> |
5715
|
|
|
* |
5716
|
|
|
* @return mixed a string or an array of replacements |
5717
|
|
|
*/ |
5718
|
29 |
|
public static function str_ireplace($search, $replace, $subject, &$count = null) |
5719
|
|
|
{ |
5720
|
29 |
|
$search = (array) $search; |
5721
|
|
|
|
5722
|
|
|
/** @noinspection AlterInForeachInspection */ |
5723
|
29 |
|
foreach ($search as &$s) { |
5724
|
29 |
|
$s = (string) $s; |
5725
|
29 |
|
if ($s === '') { |
5726
|
6 |
|
$s = '/^(?<=.)$/'; |
5727
|
|
|
} else { |
5728
|
29 |
|
$s = '/' . \preg_quote($s, '/') . '/ui'; |
5729
|
|
|
} |
5730
|
|
|
} |
5731
|
|
|
|
5732
|
29 |
|
$subject = \preg_replace($search, $replace, $subject, -1, $replace); |
5733
|
29 |
|
$count = $replace; // used as reference parameter |
5734
|
|
|
|
5735
|
29 |
|
return $subject; |
5736
|
|
|
} |
5737
|
|
|
|
5738
|
|
|
/** |
5739
|
|
|
* Replaces $search from the beginning of string with $replacement. |
5740
|
|
|
* |
5741
|
|
|
* @param string $str <p>The input string.</p> |
5742
|
|
|
* @param string $search <p>The string to search for.</p> |
5743
|
|
|
* @param string $replacement <p>The replacement.</p> |
5744
|
|
|
* |
5745
|
|
|
* @return string string after the replacements |
5746
|
|
|
*/ |
5747
|
17 |
|
public static function str_ireplace_beginning(string $str, string $search, string $replacement): string |
5748
|
|
|
{ |
5749
|
17 |
|
if ($str === '') { |
5750
|
4 |
|
if ($replacement === '') { |
5751
|
2 |
|
return ''; |
5752
|
|
|
} |
5753
|
|
|
|
5754
|
2 |
|
if ($search === '') { |
5755
|
2 |
|
return $replacement; |
5756
|
|
|
} |
5757
|
|
|
} |
5758
|
|
|
|
5759
|
13 |
|
if ($search === '') { |
5760
|
2 |
|
return $str . $replacement; |
5761
|
|
|
} |
5762
|
|
|
|
5763
|
11 |
|
if (\stripos($str, $search) === 0) { |
5764
|
10 |
|
return $replacement . \substr($str, \strlen($search)); |
5765
|
|
|
} |
5766
|
|
|
|
5767
|
1 |
|
return $str; |
5768
|
|
|
} |
5769
|
|
|
|
5770
|
|
|
/** |
5771
|
|
|
* Replaces $search from the ending of string with $replacement. |
5772
|
|
|
* |
5773
|
|
|
* @param string $str <p>The input string.</p> |
5774
|
|
|
* @param string $search <p>The string to search for.</p> |
5775
|
|
|
* @param string $replacement <p>The replacement.</p> |
5776
|
|
|
* |
5777
|
|
|
* @return string string after the replacements |
5778
|
|
|
*/ |
5779
|
17 |
|
public static function str_ireplace_ending(string $str, string $search, string $replacement): string |
5780
|
|
|
{ |
5781
|
17 |
|
if ($str === '') { |
5782
|
4 |
|
if ($replacement === '') { |
5783
|
2 |
|
return ''; |
5784
|
|
|
} |
5785
|
|
|
|
5786
|
2 |
|
if ($search === '') { |
5787
|
2 |
|
return $replacement; |
5788
|
|
|
} |
5789
|
|
|
} |
5790
|
|
|
|
5791
|
13 |
|
if ($search === '') { |
5792
|
2 |
|
return $str . $replacement; |
5793
|
|
|
} |
5794
|
|
|
|
5795
|
11 |
|
if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) { |
5796
|
9 |
|
$str = \substr($str, 0, -\strlen($search)) . $replacement; |
5797
|
|
|
} |
5798
|
|
|
|
5799
|
11 |
|
return $str; |
5800
|
|
|
} |
5801
|
|
|
|
5802
|
|
|
/** |
5803
|
|
|
* Check if the string starts with the given substring, case insensitive. |
5804
|
|
|
* |
5805
|
|
|
* @param string $haystack <p>The string to search in.</p> |
5806
|
|
|
* @param string $needle <p>The substring to search for.</p> |
5807
|
|
|
* |
5808
|
|
|
* @return bool |
5809
|
|
|
*/ |
5810
|
12 |
|
public static function str_istarts_with(string $haystack, string $needle): bool |
5811
|
|
|
{ |
5812
|
12 |
|
if ($haystack === '' || $needle === '') { |
5813
|
2 |
|
return false; |
5814
|
|
|
} |
5815
|
|
|
|
5816
|
12 |
|
if (self::stripos($haystack, $needle) === 0) { |
5817
|
12 |
|
return true; |
5818
|
|
|
} |
5819
|
|
|
|
5820
|
4 |
|
return false; |
5821
|
|
|
} |
5822
|
|
|
|
5823
|
|
|
/** |
5824
|
|
|
* Returns true if the string begins with any of $substrings, false otherwise. |
5825
|
|
|
* |
5826
|
|
|
* - case-insensitive |
5827
|
|
|
* |
5828
|
|
|
* @param string $str <p>The input string.</p> |
5829
|
|
|
* @param array $substrings <p>Substrings to look for.</p> |
5830
|
|
|
* |
5831
|
|
|
* @return bool whether or not $str starts with $substring |
5832
|
|
|
*/ |
5833
|
4 |
|
public static function str_istarts_with_any(string $str, array $substrings): bool |
5834
|
|
|
{ |
5835
|
4 |
|
if ($str === '') { |
5836
|
|
|
return false; |
5837
|
|
|
} |
5838
|
|
|
|
5839
|
4 |
|
if (empty($substrings)) { |
5840
|
|
|
return false; |
5841
|
|
|
} |
5842
|
|
|
|
5843
|
4 |
|
foreach ($substrings as $substring) { |
5844
|
4 |
|
if (self::str_istarts_with($str, $substring)) { |
5845
|
4 |
|
return true; |
5846
|
|
|
} |
5847
|
|
|
} |
5848
|
|
|
|
5849
|
|
|
return false; |
5850
|
|
|
} |
5851
|
|
|
|
5852
|
|
|
/** |
5853
|
|
|
* Gets the substring after the first occurrence of a separator. |
5854
|
|
|
* |
5855
|
|
|
* @param string $str <p>The input string.</p> |
5856
|
|
|
* @param string $separator <p>The string separator.</p> |
5857
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
5858
|
|
|
* |
5859
|
|
|
* @return string |
5860
|
|
|
*/ |
5861
|
1 |
|
public static function str_isubstr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
5862
|
|
|
{ |
5863
|
|
|
if ( |
5864
|
1 |
|
$separator === '' |
5865
|
|
|
|| |
5866
|
1 |
|
$str === '' |
5867
|
|
|
) { |
5868
|
1 |
|
return ''; |
5869
|
|
|
} |
5870
|
|
|
|
5871
|
1 |
|
$offset = self::str_iindex_first($str, $separator); |
5872
|
1 |
|
if ($offset === false) { |
5873
|
1 |
|
return ''; |
5874
|
|
|
} |
5875
|
|
|
|
5876
|
1 |
|
return (string) self::substr( |
5877
|
1 |
|
$str, |
5878
|
1 |
|
$offset + self::strlen($separator, $encoding), |
5879
|
1 |
|
null, |
5880
|
1 |
|
$encoding |
5881
|
|
|
); |
5882
|
|
|
} |
5883
|
|
|
|
5884
|
|
|
/** |
5885
|
|
|
* Gets the substring after the last occurrence of a separator. |
5886
|
|
|
* |
5887
|
|
|
* @param string $str <p>The input string.</p> |
5888
|
|
|
* @param string $separator <p>The string separator.</p> |
5889
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
5890
|
|
|
* |
5891
|
|
|
* @return string |
5892
|
|
|
*/ |
5893
|
1 |
|
public static function str_isubstr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
5894
|
|
|
{ |
5895
|
|
|
if ( |
5896
|
1 |
|
$separator === '' |
5897
|
|
|
|| |
5898
|
1 |
|
$str === '' |
5899
|
|
|
) { |
5900
|
1 |
|
return ''; |
5901
|
|
|
} |
5902
|
|
|
|
5903
|
1 |
|
$offset = self::str_iindex_last($str, $separator); |
5904
|
1 |
|
if ($offset === false) { |
5905
|
1 |
|
return ''; |
5906
|
|
|
} |
5907
|
|
|
|
5908
|
1 |
|
return (string) self::substr( |
5909
|
1 |
|
$str, |
5910
|
1 |
|
$offset + self::strlen($separator, $encoding), |
5911
|
1 |
|
null, |
5912
|
1 |
|
$encoding |
5913
|
|
|
); |
5914
|
|
|
} |
5915
|
|
|
|
5916
|
|
|
/** |
5917
|
|
|
* Gets the substring before the first occurrence of a separator. |
5918
|
|
|
* |
5919
|
|
|
* @param string $str <p>The input string.</p> |
5920
|
|
|
* @param string $separator <p>The string separator.</p> |
5921
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
5922
|
|
|
* |
5923
|
|
|
* @return string |
5924
|
|
|
*/ |
5925
|
1 |
|
public static function str_isubstr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
5926
|
|
|
{ |
5927
|
|
|
if ( |
5928
|
1 |
|
$separator === '' |
5929
|
|
|
|| |
5930
|
1 |
|
$str === '' |
5931
|
|
|
) { |
5932
|
1 |
|
return ''; |
5933
|
|
|
} |
5934
|
|
|
|
5935
|
1 |
|
$offset = self::str_iindex_first($str, $separator); |
5936
|
1 |
|
if ($offset === false) { |
5937
|
1 |
|
return ''; |
5938
|
|
|
} |
5939
|
|
|
|
5940
|
1 |
|
return (string) self::substr($str, 0, $offset, $encoding); |
5941
|
|
|
} |
5942
|
|
|
|
5943
|
|
|
/** |
5944
|
|
|
* Gets the substring before the last occurrence of a separator. |
5945
|
|
|
* |
5946
|
|
|
* @param string $str <p>The input string.</p> |
5947
|
|
|
* @param string $separator <p>The string separator.</p> |
5948
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
5949
|
|
|
* |
5950
|
|
|
* @return string |
5951
|
|
|
*/ |
5952
|
1 |
|
public static function str_isubstr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
5953
|
|
|
{ |
5954
|
|
|
if ( |
5955
|
1 |
|
$separator === '' |
5956
|
|
|
|| |
5957
|
1 |
|
$str === '' |
5958
|
|
|
) { |
5959
|
1 |
|
return ''; |
5960
|
|
|
} |
5961
|
|
|
|
5962
|
1 |
|
$offset = self::str_iindex_last($str, $separator); |
5963
|
1 |
|
if ($offset === false) { |
5964
|
1 |
|
return ''; |
5965
|
|
|
} |
5966
|
|
|
|
5967
|
1 |
|
return (string) self::substr($str, 0, $offset, $encoding); |
5968
|
|
|
} |
5969
|
|
|
|
5970
|
|
|
/** |
5971
|
|
|
* Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle". |
5972
|
|
|
* |
5973
|
|
|
* @param string $str <p>The input string.</p> |
5974
|
|
|
* @param string $needle <p>The string to look for.</p> |
5975
|
|
|
* @param bool $beforeNeedle [optional] <p>Default: false</p> |
5976
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
5977
|
|
|
* |
5978
|
|
|
* @return string |
5979
|
|
|
*/ |
5980
|
2 |
|
public static function str_isubstr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string |
5981
|
|
|
{ |
5982
|
|
|
if ( |
5983
|
2 |
|
$needle === '' |
5984
|
|
|
|| |
5985
|
2 |
|
$str === '' |
5986
|
|
|
) { |
5987
|
2 |
|
return ''; |
5988
|
|
|
} |
5989
|
|
|
|
5990
|
2 |
|
$part = self::stristr( |
5991
|
2 |
|
$str, |
5992
|
2 |
|
$needle, |
5993
|
2 |
|
$beforeNeedle, |
5994
|
2 |
|
$encoding |
5995
|
|
|
); |
5996
|
2 |
|
if ($part === false) { |
5997
|
2 |
|
return ''; |
5998
|
|
|
} |
5999
|
|
|
|
6000
|
2 |
|
return $part; |
6001
|
|
|
} |
6002
|
|
|
|
6003
|
|
|
/** |
6004
|
|
|
* Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle". |
6005
|
|
|
* |
6006
|
|
|
* @param string $str <p>The input string.</p> |
6007
|
|
|
* @param string $needle <p>The string to look for.</p> |
6008
|
|
|
* @param bool $beforeNeedle [optional] <p>Default: false</p> |
6009
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6010
|
|
|
* |
6011
|
|
|
* @return string |
6012
|
|
|
*/ |
6013
|
1 |
|
public static function str_isubstr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string |
6014
|
|
|
{ |
6015
|
|
|
if ( |
6016
|
1 |
|
$needle === '' |
6017
|
|
|
|| |
6018
|
1 |
|
$str === '' |
6019
|
|
|
) { |
6020
|
1 |
|
return ''; |
6021
|
|
|
} |
6022
|
|
|
|
6023
|
1 |
|
$part = self::strrichr($str, $needle, $beforeNeedle, $encoding); |
6024
|
1 |
|
if ($part === false) { |
6025
|
1 |
|
return ''; |
6026
|
|
|
} |
6027
|
|
|
|
6028
|
1 |
|
return $part; |
6029
|
|
|
} |
6030
|
|
|
|
6031
|
|
|
/** |
6032
|
|
|
* Returns the last $n characters of the string. |
6033
|
|
|
* |
6034
|
|
|
* @param string $str <p>The input string.</p> |
6035
|
|
|
* @param int $n <p>Number of characters to retrieve from the end.</p> |
6036
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6037
|
|
|
* |
6038
|
|
|
* @return string |
6039
|
|
|
*/ |
6040
|
12 |
|
public static function str_last_char(string $str, int $n = 1, string $encoding = 'UTF-8'): string |
6041
|
|
|
{ |
6042
|
12 |
|
if ($n <= 0) { |
6043
|
4 |
|
return ''; |
6044
|
|
|
} |
6045
|
|
|
|
6046
|
8 |
|
$returnTmp = self::substr($str, -$n, null, $encoding); |
6047
|
|
|
|
6048
|
8 |
|
return $returnTmp === false ? '' : $returnTmp; |
6049
|
|
|
} |
6050
|
|
|
|
6051
|
|
|
/** |
6052
|
|
|
* Limit the number of characters in a string. |
6053
|
|
|
* |
6054
|
|
|
* @param string $str <p>The input string.</p> |
6055
|
|
|
* @param int $length [optional] <p>Default: 100</p> |
6056
|
|
|
* @param string $strAddOn [optional] <p>Default: …</p> |
6057
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6058
|
|
|
* |
6059
|
|
|
* @return string |
6060
|
|
|
*/ |
6061
|
2 |
|
public static function str_limit(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string |
6062
|
|
|
{ |
6063
|
2 |
|
if ($str === '') { |
6064
|
2 |
|
return ''; |
6065
|
|
|
} |
6066
|
|
|
|
6067
|
2 |
|
if ($length <= 0) { |
6068
|
2 |
|
return ''; |
6069
|
|
|
} |
6070
|
|
|
|
6071
|
2 |
|
if (self::strlen($str, $encoding) <= $length) { |
6072
|
2 |
|
return $str; |
6073
|
|
|
} |
6074
|
|
|
|
6075
|
2 |
|
return self::substr($str, 0, $length - self::strlen($strAddOn), $encoding) . $strAddOn; |
|
|
|
|
6076
|
|
|
} |
6077
|
|
|
|
6078
|
|
|
/** |
6079
|
|
|
* Limit the number of characters in a string, but also after the next word. |
6080
|
|
|
* |
6081
|
|
|
* @param string $str <p>The input string.</p> |
6082
|
|
|
* @param int $length [optional] <p>Default: 100</p> |
6083
|
|
|
* @param string $strAddOn [optional] <p>Default: …</p> |
6084
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6085
|
|
|
* |
6086
|
|
|
* @return string |
6087
|
|
|
*/ |
6088
|
6 |
|
public static function str_limit_after_word(string $str, int $length = 100, string $strAddOn = '…', string $encoding = 'UTF-8'): string |
6089
|
|
|
{ |
6090
|
6 |
|
if ($str === '') { |
6091
|
2 |
|
return ''; |
6092
|
|
|
} |
6093
|
|
|
|
6094
|
6 |
|
if ($length <= 0) { |
6095
|
2 |
|
return ''; |
6096
|
|
|
} |
6097
|
|
|
|
6098
|
6 |
|
if (self::strlen($str, $encoding) <= $length) { |
6099
|
2 |
|
return $str; |
6100
|
|
|
} |
6101
|
|
|
|
6102
|
6 |
|
if (self::substr($str, $length - 1, 1, $encoding) === ' ') { |
6103
|
5 |
|
return self::substr($str, 0, $length - 1, $encoding) . $strAddOn; |
|
|
|
|
6104
|
|
|
} |
6105
|
|
|
|
6106
|
3 |
|
$str = (string) self::substr($str, 0, $length, $encoding); |
6107
|
3 |
|
$array = \explode(' ', $str); |
6108
|
3 |
|
\array_pop($array); |
6109
|
3 |
|
$new_str = \implode(' ', $array); |
6110
|
|
|
|
6111
|
3 |
|
if ($new_str === '') { |
6112
|
2 |
|
$str = self::substr($str, 0, $length - 1, $encoding) . $strAddOn; |
6113
|
|
|
} else { |
6114
|
3 |
|
$str = $new_str . $strAddOn; |
6115
|
|
|
} |
6116
|
|
|
|
6117
|
3 |
|
return $str; |
6118
|
|
|
} |
6119
|
|
|
|
6120
|
|
|
/** |
6121
|
|
|
* Returns the longest common prefix between the string and $otherStr. |
6122
|
|
|
* |
6123
|
|
|
* @param string $str <p>The input sting.</p> |
6124
|
|
|
* @param string $otherStr <p>Second string for comparison.</p> |
6125
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6126
|
|
|
* |
6127
|
|
|
* @return string |
6128
|
|
|
*/ |
6129
|
10 |
|
public static function str_longest_common_prefix(string $str, string $otherStr, string $encoding = 'UTF-8'): string |
6130
|
|
|
{ |
6131
|
10 |
|
$maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding)); |
6132
|
|
|
|
6133
|
10 |
|
$longestCommonPrefix = ''; |
6134
|
10 |
|
for ($i = 0; $i < $maxLength; $i++) { |
6135
|
8 |
|
$char = self::substr($str, $i, 1, $encoding); |
6136
|
|
|
|
6137
|
8 |
|
if ($char === self::substr($otherStr, $i, 1, $encoding)) { |
6138
|
6 |
|
$longestCommonPrefix .= $char; |
6139
|
|
|
} else { |
6140
|
6 |
|
break; |
6141
|
|
|
} |
6142
|
|
|
} |
6143
|
|
|
|
6144
|
10 |
|
return $longestCommonPrefix; |
6145
|
|
|
} |
6146
|
|
|
|
6147
|
|
|
/** |
6148
|
|
|
* Returns the longest common substring between the string and $otherStr. |
6149
|
|
|
* In the case of ties, it returns that which occurs first. |
6150
|
|
|
* |
6151
|
|
|
* @param string $str |
6152
|
|
|
* @param string $otherStr <p>Second string for comparison.</p> |
6153
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6154
|
|
|
* |
6155
|
|
|
* @return string string with its $str being the longest common substring |
6156
|
|
|
*/ |
6157
|
11 |
|
public static function str_longest_common_substring(string $str, string $otherStr, string $encoding = 'UTF-8'): string |
6158
|
|
|
{ |
6159
|
|
|
// Uses dynamic programming to solve |
6160
|
|
|
// http://en.wikipedia.org/wiki/Longest_common_substring_problem |
6161
|
11 |
|
$strLength = self::strlen($str, $encoding); |
6162
|
11 |
|
$otherLength = self::strlen($otherStr, $encoding); |
6163
|
|
|
|
6164
|
|
|
// Return if either string is empty |
6165
|
11 |
|
if ($strLength === 0 || $otherLength === 0) { |
6166
|
2 |
|
return ''; |
6167
|
|
|
} |
6168
|
|
|
|
6169
|
9 |
|
$len = 0; |
6170
|
9 |
|
$end = 0; |
6171
|
9 |
|
$table = \array_fill( |
6172
|
9 |
|
0, |
6173
|
9 |
|
$strLength + 1, |
6174
|
9 |
|
\array_fill(0, $otherLength + 1, 0) |
6175
|
|
|
); |
6176
|
|
|
|
6177
|
9 |
|
for ($i = 1; $i <= $strLength; $i++) { |
6178
|
9 |
|
for ($j = 1; $j <= $otherLength; $j++) { |
6179
|
9 |
|
$strChar = self::substr($str, $i - 1, 1, $encoding); |
6180
|
9 |
|
$otherChar = self::substr($otherStr, $j - 1, 1, $encoding); |
6181
|
|
|
|
6182
|
9 |
|
if ($strChar === $otherChar) { |
6183
|
8 |
|
$table[$i][$j] = $table[$i - 1][$j - 1] + 1; |
6184
|
8 |
|
if ($table[$i][$j] > $len) { |
6185
|
8 |
|
$len = $table[$i][$j]; |
6186
|
8 |
|
$end = $i; |
6187
|
|
|
} |
6188
|
|
|
} else { |
6189
|
9 |
|
$table[$i][$j] = 0; |
6190
|
|
|
} |
6191
|
|
|
} |
6192
|
|
|
} |
6193
|
|
|
|
6194
|
9 |
|
$returnTmp = self::substr($str, $end - $len, $len, $encoding); |
6195
|
|
|
|
6196
|
9 |
|
return $returnTmp === false ? '' : $returnTmp; |
6197
|
|
|
} |
6198
|
|
|
|
6199
|
|
|
/** |
6200
|
|
|
* Returns the longest common suffix between the string and $otherStr. |
6201
|
|
|
* |
6202
|
|
|
* @param string $str |
6203
|
|
|
* @param string $otherStr <p>Second string for comparison.</p> |
6204
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6205
|
|
|
* |
6206
|
|
|
* @return string |
6207
|
|
|
*/ |
6208
|
10 |
|
public static function str_longest_common_suffix(string $str, string $otherStr, string $encoding = 'UTF-8'): string |
6209
|
|
|
{ |
6210
|
10 |
|
$maxLength = \min(self::strlen($str, $encoding), self::strlen($otherStr, $encoding)); |
6211
|
|
|
|
6212
|
10 |
|
$longestCommonSuffix = ''; |
6213
|
10 |
|
for ($i = 1; $i <= $maxLength; $i++) { |
6214
|
8 |
|
$char = self::substr($str, -$i, 1, $encoding); |
6215
|
|
|
|
6216
|
8 |
|
if ($char === self::substr($otherStr, -$i, 1, $encoding)) { |
6217
|
6 |
|
$longestCommonSuffix = $char . $longestCommonSuffix; |
|
|
|
|
6218
|
|
|
} else { |
6219
|
6 |
|
break; |
6220
|
|
|
} |
6221
|
|
|
} |
6222
|
|
|
|
6223
|
10 |
|
return $longestCommonSuffix; |
6224
|
|
|
} |
6225
|
|
|
|
6226
|
|
|
/** |
6227
|
|
|
* Returns true if $str matches the supplied pattern, false otherwise. |
6228
|
|
|
* |
6229
|
|
|
* @param string $str <p>The input string.</p> |
6230
|
|
|
* @param string $pattern <p>Regex pattern to match against.</p> |
6231
|
|
|
* |
6232
|
|
|
* @return bool whether or not $str matches the pattern |
6233
|
|
|
*/ |
6234
|
126 |
|
public static function str_matches_pattern(string $str, string $pattern): bool |
6235
|
|
|
{ |
6236
|
126 |
|
if (\preg_match('/' . $pattern . '/u', $str)) { |
6237
|
87 |
|
return true; |
6238
|
|
|
} |
6239
|
|
|
|
6240
|
39 |
|
return false; |
6241
|
|
|
} |
6242
|
|
|
|
6243
|
|
|
/** |
6244
|
|
|
* Returns whether or not a character exists at an index. Offsets may be |
6245
|
|
|
* negative to count from the last character in the string. Implements |
6246
|
|
|
* part of the ArrayAccess interface. |
6247
|
|
|
* |
6248
|
|
|
* @param string $str <p>The input string.</p> |
6249
|
|
|
* @param int $offset <p>The index to check.</p> |
6250
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
6251
|
|
|
* |
6252
|
|
|
* @return bool whether or not the index exists |
6253
|
|
|
*/ |
6254
|
6 |
|
public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool |
6255
|
|
|
{ |
6256
|
|
|
// init |
6257
|
6 |
|
$length = self::strlen($str, $encoding); |
6258
|
|
|
|
6259
|
6 |
|
if ($offset >= 0) { |
6260
|
3 |
|
return $length > $offset; |
6261
|
|
|
} |
6262
|
|
|
|
6263
|
3 |
|
return $length >= \abs($offset); |
6264
|
|
|
} |
6265
|
|
|
|
6266
|
|
|
/** |
6267
|
|
|
* Returns the character at the given index. Offsets may be negative to |
6268
|
|
|
* count from the last character in the string. Implements part of the |
6269
|
|
|
* ArrayAccess interface, and throws an OutOfBoundsException if the index |
6270
|
|
|
* does not exist. |
6271
|
|
|
* |
6272
|
|
|
* @param string $str <p>The input string.</p> |
6273
|
|
|
* @param int $index <p>The <strong>index</strong> from which to retrieve the char.</p> |
6274
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6275
|
|
|
* |
6276
|
|
|
* @throws \OutOfBoundsException if the positive or negative offset does not exist |
6277
|
|
|
* |
6278
|
|
|
* @return string the character at the specified index |
6279
|
|
|
*/ |
6280
|
2 |
|
public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string |
6281
|
|
|
{ |
6282
|
|
|
// init |
6283
|
2 |
|
$length = self::strlen($str); |
6284
|
|
|
|
6285
|
|
|
if ( |
6286
|
2 |
|
($index >= 0 && $length <= $index) |
6287
|
|
|
|| |
6288
|
2 |
|
$length < \abs($index) |
6289
|
|
|
) { |
6290
|
1 |
|
throw new \OutOfBoundsException('No character exists at the index'); |
6291
|
|
|
} |
6292
|
|
|
|
6293
|
1 |
|
return self::char_at($str, $index, $encoding); |
6294
|
|
|
} |
6295
|
|
|
|
6296
|
|
|
/** |
6297
|
|
|
* Pad a UTF-8 string to given length with another string. |
6298
|
|
|
* |
6299
|
|
|
* @param string $str <p>The input string.</p> |
6300
|
|
|
* @param int $pad_length <p>The length of return string.</p> |
6301
|
|
|
* @param string $pad_string [optional] <p>String to use for padding the input string.</p> |
6302
|
|
|
* @param int $pad_type [optional] <p> |
6303
|
|
|
* Can be <strong>STR_PAD_RIGHT</strong> (default), |
6304
|
|
|
* <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong> |
6305
|
|
|
* </p> |
6306
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6307
|
|
|
* |
6308
|
|
|
* @return string returns the padded string |
6309
|
|
|
*/ |
6310
|
41 |
|
public static function str_pad(string $str, int $pad_length, string $pad_string = ' ', $pad_type = \STR_PAD_RIGHT, string $encoding = 'UTF-8'): string |
6311
|
|
|
{ |
6312
|
41 |
|
if ($str === '') { |
6313
|
|
|
return ''; |
6314
|
|
|
} |
6315
|
|
|
|
6316
|
41 |
|
if ($pad_type !== (int) $pad_type) { |
6317
|
13 |
|
if ($pad_type === 'left') { |
|
|
|
|
6318
|
3 |
|
$pad_type = \STR_PAD_LEFT; |
6319
|
10 |
|
} elseif ($pad_type === 'right') { |
|
|
|
|
6320
|
6 |
|
$pad_type = \STR_PAD_RIGHT; |
6321
|
4 |
|
} elseif ($pad_type === 'both') { |
|
|
|
|
6322
|
3 |
|
$pad_type = \STR_PAD_BOTH; |
6323
|
|
|
} else { |
6324
|
1 |
|
throw new \InvalidArgumentException( |
6325
|
1 |
|
'Pad expects $padType to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'" |
6326
|
|
|
); |
6327
|
|
|
} |
6328
|
|
|
} |
6329
|
|
|
|
6330
|
40 |
|
$str_length = self::strlen($str, $encoding); |
6331
|
|
|
|
6332
|
|
|
if ( |
6333
|
40 |
|
$pad_length > 0 |
6334
|
|
|
&& |
6335
|
40 |
|
$pad_length >= $str_length |
6336
|
|
|
) { |
6337
|
39 |
|
$ps_length = self::strlen($pad_string, $encoding); |
6338
|
|
|
|
6339
|
39 |
|
$diff = ($pad_length - $str_length); |
6340
|
|
|
|
6341
|
|
|
switch ($pad_type) { |
6342
|
39 |
|
case \STR_PAD_LEFT: |
6343
|
13 |
|
$pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length)); |
6344
|
13 |
|
$pre = (string) self::substr($pre, 0, $diff, $encoding); |
6345
|
13 |
|
$post = ''; |
6346
|
|
|
|
6347
|
13 |
|
break; |
6348
|
|
|
|
6349
|
29 |
|
case \STR_PAD_BOTH: |
6350
|
14 |
|
$pre = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2)); |
6351
|
14 |
|
$pre = (string) self::substr($pre, 0, (int) \floor($diff / 2), $encoding); |
6352
|
14 |
|
$post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length / 2)); |
6353
|
14 |
|
$post = (string) self::substr($post, 0, (int) \ceil($diff / 2), $encoding); |
6354
|
|
|
|
6355
|
14 |
|
break; |
6356
|
|
|
|
6357
|
18 |
|
case \STR_PAD_RIGHT: |
6358
|
|
|
default: |
6359
|
18 |
|
$post = \str_repeat($pad_string, (int) \ceil($diff / $ps_length)); |
6360
|
18 |
|
$post = (string) self::substr($post, 0, $diff, $encoding); |
6361
|
18 |
|
$pre = ''; |
6362
|
|
|
} |
6363
|
|
|
|
6364
|
39 |
|
return $pre . $str . $post; |
6365
|
|
|
} |
6366
|
|
|
|
6367
|
4 |
|
return $str; |
6368
|
|
|
} |
6369
|
|
|
|
6370
|
|
|
/** |
6371
|
|
|
* Returns a new string of a given length such that both sides of the |
6372
|
|
|
* string are padded. Alias for pad() with a $padType of 'both'. |
6373
|
|
|
* |
6374
|
|
|
* @param string $str |
6375
|
|
|
* @param int $length <p>Desired string length after padding.</p> |
6376
|
|
|
* @param string $padStr [optional] <p>String used to pad, defaults to space. Default: ' '</p> |
6377
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6378
|
|
|
* |
6379
|
|
|
* @return string string with padding applied |
6380
|
|
|
*/ |
6381
|
11 |
|
public static function str_pad_both(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string |
6382
|
|
|
{ |
6383
|
11 |
|
$padding = $length - self::strlen($str, $encoding); |
6384
|
|
|
|
6385
|
11 |
|
return self::apply_padding($str, (int) \floor($padding / 2), (int) \ceil($padding / 2), $padStr, $encoding); |
6386
|
|
|
} |
6387
|
|
|
|
6388
|
|
|
/** |
6389
|
|
|
* Returns a new string of a given length such that the beginning of the |
6390
|
|
|
* string is padded. Alias for pad() with a $padType of 'left'. |
6391
|
|
|
* |
6392
|
|
|
* @param string $str |
6393
|
|
|
* @param int $length <p>Desired string length after padding.</p> |
6394
|
|
|
* @param string $padStr [optional] <p>String used to pad, defaults to space. Default: ' '</p> |
6395
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6396
|
|
|
* |
6397
|
|
|
* @return string string with left padding |
6398
|
|
|
*/ |
6399
|
7 |
|
public static function str_pad_left(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string |
6400
|
|
|
{ |
6401
|
7 |
|
return self::apply_padding($str, $length - self::strlen($str), 0, $padStr, $encoding); |
6402
|
|
|
} |
6403
|
|
|
|
6404
|
|
|
/** |
6405
|
|
|
* Returns a new string of a given length such that the end of the string |
6406
|
|
|
* is padded. Alias for pad() with a $padType of 'right'. |
6407
|
|
|
* |
6408
|
|
|
* @param string $str |
6409
|
|
|
* @param int $length <p>Desired string length after padding.</p> |
6410
|
|
|
* @param string $padStr [optional] <p>String used to pad, defaults to space. Default: ' '</p> |
6411
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6412
|
|
|
* |
6413
|
|
|
* @return string string with right padding |
6414
|
|
|
*/ |
6415
|
7 |
|
public static function str_pad_right(string $str, int $length, string $padStr = ' ', string $encoding = 'UTF-8'): string |
6416
|
|
|
{ |
6417
|
7 |
|
return self::apply_padding($str, 0, $length - self::strlen($str), $padStr, $encoding); |
6418
|
|
|
} |
6419
|
|
|
|
6420
|
|
|
/** |
6421
|
|
|
* Repeat a string. |
6422
|
|
|
* |
6423
|
|
|
* @param string $str <p> |
6424
|
|
|
* The string to be repeated. |
6425
|
|
|
* </p> |
6426
|
|
|
* @param int $multiplier <p> |
6427
|
|
|
* Number of time the input string should be |
6428
|
|
|
* repeated. |
6429
|
|
|
* </p> |
6430
|
|
|
* <p> |
6431
|
|
|
* multiplier has to be greater than or equal to 0. |
6432
|
|
|
* If the multiplier is set to 0, the function |
6433
|
|
|
* will return an empty string. |
6434
|
|
|
* </p> |
6435
|
|
|
* |
6436
|
|
|
* @return string the repeated string |
6437
|
|
|
*/ |
6438
|
9 |
|
public static function str_repeat(string $str, int $multiplier): string |
6439
|
|
|
{ |
6440
|
9 |
|
$str = self::filter($str); |
6441
|
|
|
|
6442
|
9 |
|
return \str_repeat($str, $multiplier); |
6443
|
|
|
} |
6444
|
|
|
|
6445
|
|
|
/** |
6446
|
|
|
* INFO: This is only a wrapper for "str_replace()" -> the original functions is already UTF-8 safe. |
6447
|
|
|
* |
6448
|
|
|
* Replace all occurrences of the search string with the replacement string |
6449
|
|
|
* |
6450
|
|
|
* @see http://php.net/manual/en/function.str-replace.php |
6451
|
|
|
* |
6452
|
|
|
* @param mixed $search <p> |
6453
|
|
|
* The value being searched for, otherwise known as the needle. |
6454
|
|
|
* An array may be used to designate multiple needles. |
6455
|
|
|
* </p> |
6456
|
|
|
* @param mixed $replace <p> |
6457
|
|
|
* The replacement value that replaces found search |
6458
|
|
|
* values. An array may be used to designate multiple replacements. |
6459
|
|
|
* </p> |
6460
|
|
|
* @param mixed $subject <p> |
6461
|
|
|
* The string or array being searched and replaced on, |
6462
|
|
|
* otherwise known as the haystack. |
6463
|
|
|
* </p> |
6464
|
|
|
* <p> |
6465
|
|
|
* If subject is an array, then the search and |
6466
|
|
|
* replace is performed with every entry of |
6467
|
|
|
* subject, and the return value is an array as |
6468
|
|
|
* well. |
6469
|
|
|
* </p> |
6470
|
|
|
* @param int $count [optional] If passed, this will hold the number of matched and replaced needles |
6471
|
|
|
* |
6472
|
|
|
* @return mixed this function returns a string or an array with the replaced values |
6473
|
|
|
*/ |
6474
|
60 |
|
public static function str_replace($search, $replace, $subject, int &$count = null) |
6475
|
|
|
{ |
6476
|
60 |
|
return \str_replace($search, $replace, $subject, $count); |
6477
|
|
|
} |
6478
|
|
|
|
6479
|
|
|
/** |
6480
|
|
|
* Replaces $search from the beginning of string with $replacement. |
6481
|
|
|
* |
6482
|
|
|
* @param string $str <p>The input string.</p> |
6483
|
|
|
* @param string $search <p>The string to search for.</p> |
6484
|
|
|
* @param string $replacement <p>The replacement.</p> |
6485
|
|
|
* |
6486
|
|
|
* @return string string after the replacements |
6487
|
|
|
*/ |
6488
|
17 |
|
public static function str_replace_beginning(string $str, string $search, string $replacement): string |
6489
|
|
|
{ |
6490
|
17 |
|
if ($str === '') { |
6491
|
4 |
|
if ($replacement === '') { |
6492
|
2 |
|
return ''; |
6493
|
|
|
} |
6494
|
|
|
|
6495
|
2 |
|
if ($search === '') { |
6496
|
2 |
|
return $replacement; |
6497
|
|
|
} |
6498
|
|
|
} |
6499
|
|
|
|
6500
|
13 |
|
if ($search === '') { |
6501
|
2 |
|
return $str . $replacement; |
6502
|
|
|
} |
6503
|
|
|
|
6504
|
11 |
|
if (\strpos($str, $search) === 0) { |
6505
|
9 |
|
return $replacement . \substr($str, \strlen($search)); |
6506
|
|
|
} |
6507
|
|
|
|
6508
|
2 |
|
return $str; |
6509
|
|
|
} |
6510
|
|
|
|
6511
|
|
|
/** |
6512
|
|
|
* Replaces $search from the ending of string with $replacement. |
6513
|
|
|
* |
6514
|
|
|
* @param string $str <p>The input string.</p> |
6515
|
|
|
* @param string $search <p>The string to search for.</p> |
6516
|
|
|
* @param string $replacement <p>The replacement.</p> |
6517
|
|
|
* |
6518
|
|
|
* @return string string after the replacements |
6519
|
|
|
*/ |
6520
|
17 |
|
public static function str_replace_ending(string $str, string $search, string $replacement): string |
6521
|
|
|
{ |
6522
|
17 |
|
if ($str === '') { |
6523
|
4 |
|
if ($replacement === '') { |
6524
|
2 |
|
return ''; |
6525
|
|
|
} |
6526
|
|
|
|
6527
|
2 |
|
if ($search === '') { |
6528
|
2 |
|
return $replacement; |
6529
|
|
|
} |
6530
|
|
|
} |
6531
|
|
|
|
6532
|
13 |
|
if ($search === '') { |
6533
|
2 |
|
return $str . $replacement; |
6534
|
|
|
} |
6535
|
|
|
|
6536
|
11 |
|
if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) { |
6537
|
8 |
|
$str = \substr($str, 0, -\strlen($search)) . $replacement; |
6538
|
|
|
} |
6539
|
|
|
|
6540
|
11 |
|
return $str; |
6541
|
|
|
} |
6542
|
|
|
|
6543
|
|
|
/** |
6544
|
|
|
* Replace the first "$search"-term with the "$replace"-term. |
6545
|
|
|
* |
6546
|
|
|
* @param string $search |
6547
|
|
|
* @param string $replace |
6548
|
|
|
* @param string $subject |
6549
|
|
|
* |
6550
|
|
|
* @return string |
6551
|
|
|
*/ |
6552
|
2 |
|
public static function str_replace_first(string $search, string $replace, string $subject): string |
6553
|
|
|
{ |
6554
|
2 |
|
$pos = self::strpos($subject, $search); |
6555
|
2 |
|
if ($pos !== false) { |
6556
|
2 |
|
return self::substr_replace($subject, $replace, $pos, self::strlen($search)); |
|
|
|
|
6557
|
|
|
} |
6558
|
|
|
|
6559
|
2 |
|
return $subject; |
6560
|
|
|
} |
6561
|
|
|
|
6562
|
|
|
/** |
6563
|
|
|
* Replace the last "$search"-term with the "$replace"-term. |
6564
|
|
|
* |
6565
|
|
|
* @param string $search |
6566
|
|
|
* @param string $replace |
6567
|
|
|
* @param string $subject |
6568
|
|
|
* |
6569
|
|
|
* @return string |
6570
|
|
|
*/ |
6571
|
2 |
|
public static function str_replace_last(string $search, string $replace, string $subject): string |
6572
|
|
|
{ |
6573
|
2 |
|
$pos = self::strrpos($subject, $search); |
6574
|
2 |
|
if ($pos !== false) { |
6575
|
2 |
|
return self::substr_replace($subject, $replace, $pos, self::strlen($search)); |
|
|
|
|
6576
|
|
|
} |
6577
|
|
|
|
6578
|
2 |
|
return $subject; |
6579
|
|
|
} |
6580
|
|
|
|
6581
|
|
|
/** |
6582
|
|
|
* Shuffles all the characters in the string. |
6583
|
|
|
* |
6584
|
|
|
* PS: uses random algorithm which is weak for cryptography purposes |
6585
|
|
|
* |
6586
|
|
|
* @param string $str <p>The input string</p> |
6587
|
|
|
* |
6588
|
|
|
* @return string the shuffled string |
6589
|
|
|
*/ |
6590
|
5 |
|
public static function str_shuffle(string $str): string |
6591
|
|
|
{ |
6592
|
5 |
|
$indexes = \range(0, self::strlen($str) - 1); |
6593
|
|
|
/** @noinspection NonSecureShuffleUsageInspection */ |
6594
|
5 |
|
\shuffle($indexes); |
6595
|
|
|
|
6596
|
5 |
|
$shuffledStr = ''; |
6597
|
5 |
|
foreach ($indexes as $i) { |
6598
|
5 |
|
$shuffledStr .= self::substr($str, $i, 1); |
6599
|
|
|
} |
6600
|
|
|
|
6601
|
5 |
|
return $shuffledStr; |
6602
|
|
|
} |
6603
|
|
|
|
6604
|
|
|
/** |
6605
|
|
|
* Returns the substring beginning at $start, and up to, but not including |
6606
|
|
|
* the index specified by $end. If $end is omitted, the function extracts |
6607
|
|
|
* the remaining string. If $end is negative, it is computed from the end |
6608
|
|
|
* of the string. |
6609
|
|
|
* |
6610
|
|
|
* @param string $str |
6611
|
|
|
* @param int $start <p>Initial index from which to begin extraction.</p> |
6612
|
|
|
* @param int $end [optional] <p>Index at which to end extraction. Default: null</p> |
6613
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6614
|
|
|
* |
6615
|
|
|
* @return false|string |
6616
|
|
|
* <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i> |
6617
|
|
|
* characters long, <b>FALSE</b> will be returned. |
6618
|
|
|
*/ |
6619
|
18 |
|
public static function str_slice(string $str, int $start, int $end = null, string $encoding = 'UTF-8') |
6620
|
|
|
{ |
6621
|
18 |
|
if ($end === null) { |
6622
|
6 |
|
$length = self::strlen($str); |
6623
|
12 |
|
} elseif ($end >= 0 && $end <= $start) { |
6624
|
4 |
|
return ''; |
6625
|
8 |
|
} elseif ($end < 0) { |
6626
|
2 |
|
$length = self::strlen($str) + $end - $start; |
6627
|
|
|
} else { |
6628
|
6 |
|
$length = $end - $start; |
6629
|
|
|
} |
6630
|
|
|
|
6631
|
14 |
|
return self::substr($str, $start, $length, $encoding); |
|
|
|
|
6632
|
|
|
} |
6633
|
|
|
|
6634
|
|
|
/** |
6635
|
|
|
* Convert a string to e.g.: "snake_case" |
6636
|
|
|
* |
6637
|
|
|
* @param string $str |
6638
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6639
|
|
|
* |
6640
|
|
|
* @return string string in snake_case |
6641
|
|
|
*/ |
6642
|
20 |
|
public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string |
6643
|
|
|
{ |
6644
|
20 |
|
$str = self::normalize_whitespace($str); |
6645
|
20 |
|
$str = \str_replace('-', '_', $str); |
6646
|
|
|
|
6647
|
20 |
|
$str = (string) \preg_replace_callback( |
6648
|
20 |
|
'/([\d|A-Z])/u', |
6649
|
|
|
function ($matches) use ($encoding) { |
6650
|
8 |
|
$match = $matches[1]; |
6651
|
8 |
|
$matchInt = (int) $match; |
6652
|
|
|
|
6653
|
8 |
|
if ((string) $matchInt === $match) { |
6654
|
4 |
|
return '_' . $match . '_'; |
6655
|
|
|
} |
6656
|
|
|
|
6657
|
4 |
|
return '_' . self::strtolower($match, $encoding); |
6658
|
20 |
|
}, |
6659
|
20 |
|
$str |
6660
|
|
|
); |
6661
|
|
|
|
6662
|
20 |
|
$str = (string) \preg_replace( |
6663
|
|
|
[ |
6664
|
20 |
|
'/\s+/', // convert spaces to "_" |
6665
|
|
|
'/^\s+|\s+$/', // trim leading & trailing spaces |
6666
|
|
|
'/_+/', // remove double "_" |
6667
|
|
|
], |
6668
|
|
|
[ |
6669
|
20 |
|
'_', |
6670
|
|
|
'', |
6671
|
|
|
'_', |
6672
|
|
|
], |
6673
|
20 |
|
$str |
6674
|
|
|
); |
6675
|
|
|
|
6676
|
20 |
|
$str = self::trim($str, '_'); // trim leading & trailing "_" |
6677
|
|
|
|
6678
|
20 |
|
return self::trim($str); // trim leading & trailing whitespace |
6679
|
|
|
} |
6680
|
|
|
|
6681
|
|
|
/** |
6682
|
|
|
* Sort all characters according to code points. |
6683
|
|
|
* |
6684
|
|
|
* @param string $str <p>A UTF-8 string.</p> |
6685
|
|
|
* @param bool $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p> |
6686
|
|
|
* @param bool $desc <p>If <strong>true</strong>, will sort characters in reverse code point order.</p> |
6687
|
|
|
* |
6688
|
|
|
* @return string string of sorted characters |
6689
|
|
|
*/ |
6690
|
2 |
|
public static function str_sort(string $str, bool $unique = false, bool $desc = false): string |
6691
|
|
|
{ |
6692
|
2 |
|
$array = self::codepoints($str); |
6693
|
|
|
|
6694
|
2 |
|
if ($unique) { |
6695
|
2 |
|
$array = \array_flip(\array_flip($array)); |
6696
|
|
|
} |
6697
|
|
|
|
6698
|
2 |
|
if ($desc) { |
6699
|
2 |
|
\arsort($array); |
|
|
|
|
6700
|
|
|
} else { |
6701
|
2 |
|
\asort($array); |
|
|
|
|
6702
|
|
|
} |
6703
|
|
|
|
6704
|
2 |
|
return self::string($array); |
6705
|
|
|
} |
6706
|
|
|
|
6707
|
|
|
/** |
6708
|
|
|
* alias for "UTF8::split()" |
6709
|
|
|
* |
6710
|
|
|
* @see UTF8::split() |
6711
|
|
|
* |
6712
|
|
|
* @param string|string[] $str |
6713
|
|
|
* @param int $len |
6714
|
|
|
* |
6715
|
|
|
* @return string[] |
6716
|
|
|
*/ |
6717
|
25 |
|
public static function str_split($str, int $len = 1): array |
6718
|
|
|
{ |
6719
|
25 |
|
return self::split($str, $len); |
6720
|
|
|
} |
6721
|
|
|
|
6722
|
|
|
/** |
6723
|
|
|
* Splits the string with the provided regular expression, returning an |
6724
|
|
|
* array of Stringy objects. An optional integer $limit will truncate the |
6725
|
|
|
* results. |
6726
|
|
|
* |
6727
|
|
|
* @param string $str |
6728
|
|
|
* @param string $pattern <p>The regex with which to split the string.</p> |
6729
|
|
|
* @param int $limit [optional] <p>Maximum number of results to return. Default: -1 === no limit</p> |
6730
|
|
|
* |
6731
|
|
|
* @return string[] an array of strings |
6732
|
|
|
*/ |
6733
|
16 |
|
public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array |
6734
|
|
|
{ |
6735
|
16 |
|
if ($limit === 0) { |
6736
|
2 |
|
return []; |
6737
|
|
|
} |
6738
|
|
|
|
6739
|
|
|
// this->split errors when supplied an empty pattern in < PHP 5.4.13 |
6740
|
|
|
// and current versions of HHVM (3.8 and below) |
6741
|
14 |
|
if ($pattern === '') { |
6742
|
1 |
|
return [$str]; |
6743
|
|
|
} |
6744
|
|
|
|
6745
|
|
|
// this->split returns the remaining unsplit string in the last index when |
6746
|
|
|
// supplying a limit |
6747
|
13 |
|
if ($limit > 0) { |
6748
|
8 |
|
++$limit; |
6749
|
|
|
} else { |
6750
|
5 |
|
$limit = -1; |
6751
|
|
|
} |
6752
|
|
|
|
6753
|
13 |
|
$array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit); |
6754
|
|
|
|
6755
|
13 |
|
if ($array === false) { |
6756
|
|
|
return []; |
6757
|
|
|
} |
6758
|
|
|
|
6759
|
13 |
|
if ($limit > 0 && \count($array) === $limit) { |
6760
|
4 |
|
\array_pop($array); |
6761
|
|
|
} |
6762
|
|
|
|
6763
|
13 |
|
return $array; |
6764
|
|
|
} |
6765
|
|
|
|
6766
|
|
|
/** |
6767
|
|
|
* Check if the string starts with the given substring. |
6768
|
|
|
* |
6769
|
|
|
* @param string $haystack <p>The string to search in.</p> |
6770
|
|
|
* @param string $needle <p>The substring to search for.</p> |
6771
|
|
|
* |
6772
|
|
|
* @return bool |
6773
|
|
|
*/ |
6774
|
41 |
|
public static function str_starts_with(string $haystack, string $needle): bool |
6775
|
|
|
{ |
6776
|
41 |
|
if ($haystack === '' || $needle === '') { |
6777
|
4 |
|
return false; |
6778
|
|
|
} |
6779
|
|
|
|
6780
|
39 |
|
if (\strpos($haystack, $needle) === 0) { |
6781
|
19 |
|
return true; |
6782
|
|
|
} |
6783
|
|
|
|
6784
|
24 |
|
return false; |
6785
|
|
|
} |
6786
|
|
|
|
6787
|
|
|
/** |
6788
|
|
|
* Returns true if the string begins with any of $substrings, false otherwise. |
6789
|
|
|
* |
6790
|
|
|
* - case-sensitive |
6791
|
|
|
* |
6792
|
|
|
* @param string $str <p>The input string.</p> |
6793
|
|
|
* @param array $substrings <p>Substrings to look for.</p> |
6794
|
|
|
* |
6795
|
|
|
* @return bool whether or not $str starts with $substring |
6796
|
|
|
*/ |
6797
|
8 |
|
public static function str_starts_with_any(string $str, array $substrings): bool |
6798
|
|
|
{ |
6799
|
8 |
|
if ($str === '') { |
6800
|
|
|
return false; |
6801
|
|
|
} |
6802
|
|
|
|
6803
|
8 |
|
if (empty($substrings)) { |
6804
|
|
|
return false; |
6805
|
|
|
} |
6806
|
|
|
|
6807
|
8 |
|
foreach ($substrings as $substring) { |
6808
|
8 |
|
if (self::str_starts_with($str, $substring)) { |
6809
|
8 |
|
return true; |
6810
|
|
|
} |
6811
|
|
|
} |
6812
|
|
|
|
6813
|
6 |
|
return false; |
6814
|
|
|
} |
6815
|
|
|
|
6816
|
|
|
/** |
6817
|
|
|
* Gets the substring after the first occurrence of a separator. |
6818
|
|
|
* |
6819
|
|
|
* @param string $str <p>The input string.</p> |
6820
|
|
|
* @param string $separator <p>The string separator.</p> |
6821
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6822
|
|
|
* |
6823
|
|
|
* @return string |
6824
|
|
|
*/ |
6825
|
1 |
|
public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
6826
|
|
|
{ |
6827
|
|
|
if ( |
6828
|
1 |
|
$separator === '' |
6829
|
|
|
|| |
6830
|
1 |
|
$str === '' |
6831
|
|
|
) { |
6832
|
1 |
|
return ''; |
6833
|
|
|
} |
6834
|
|
|
|
6835
|
1 |
|
$offset = self::str_index_first($str, $separator); |
6836
|
1 |
|
if ($offset === false) { |
6837
|
1 |
|
return ''; |
6838
|
|
|
} |
6839
|
|
|
|
6840
|
1 |
|
return (string) self::substr( |
6841
|
1 |
|
$str, |
6842
|
1 |
|
$offset + self::strlen($separator, $encoding), |
6843
|
1 |
|
null, |
6844
|
1 |
|
$encoding |
6845
|
|
|
); |
6846
|
|
|
} |
6847
|
|
|
|
6848
|
|
|
/** |
6849
|
|
|
* Gets the substring after the last occurrence of a separator. |
6850
|
|
|
* |
6851
|
|
|
* @param string $str <p>The input string.</p> |
6852
|
|
|
* @param string $separator <p>The string separator.</p> |
6853
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6854
|
|
|
* |
6855
|
|
|
* @return string |
6856
|
|
|
*/ |
6857
|
1 |
|
public static function str_substr_after_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
6858
|
|
|
{ |
6859
|
|
|
if ( |
6860
|
1 |
|
$separator === '' |
6861
|
|
|
|| |
6862
|
1 |
|
$str === '' |
6863
|
|
|
) { |
6864
|
1 |
|
return ''; |
6865
|
|
|
} |
6866
|
|
|
|
6867
|
1 |
|
$offset = self::str_index_last($str, $separator); |
6868
|
1 |
|
if ($offset === false) { |
6869
|
1 |
|
return ''; |
6870
|
|
|
} |
6871
|
|
|
|
6872
|
1 |
|
return (string) self::substr( |
6873
|
1 |
|
$str, |
6874
|
1 |
|
$offset + self::strlen($separator, $encoding), |
6875
|
1 |
|
null, |
6876
|
1 |
|
$encoding |
6877
|
|
|
); |
6878
|
|
|
} |
6879
|
|
|
|
6880
|
|
|
/** |
6881
|
|
|
* Gets the substring before the first occurrence of a separator. |
6882
|
|
|
* |
6883
|
|
|
* @param string $str <p>The input string.</p> |
6884
|
|
|
* @param string $separator <p>The string separator.</p> |
6885
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6886
|
|
|
* |
6887
|
|
|
* @return string |
6888
|
|
|
*/ |
6889
|
1 |
|
public static function str_substr_before_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
6890
|
|
|
{ |
6891
|
|
|
if ( |
6892
|
1 |
|
$separator === '' |
6893
|
|
|
|| |
6894
|
1 |
|
$str === '' |
6895
|
|
|
) { |
6896
|
1 |
|
return ''; |
6897
|
|
|
} |
6898
|
|
|
|
6899
|
1 |
|
$offset = self::str_index_first($str, $separator); |
6900
|
1 |
|
if ($offset === false) { |
6901
|
1 |
|
return ''; |
6902
|
|
|
} |
6903
|
|
|
|
6904
|
1 |
|
return (string) self::substr( |
6905
|
1 |
|
$str, |
6906
|
1 |
|
0, |
6907
|
1 |
|
$offset, |
6908
|
1 |
|
$encoding |
6909
|
|
|
); |
6910
|
|
|
} |
6911
|
|
|
|
6912
|
|
|
/** |
6913
|
|
|
* Gets the substring before the last occurrence of a separator. |
6914
|
|
|
* |
6915
|
|
|
* @param string $str <p>The input string.</p> |
6916
|
|
|
* @param string $separator <p>The string separator.</p> |
6917
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6918
|
|
|
* |
6919
|
|
|
* @return string |
6920
|
|
|
*/ |
6921
|
1 |
|
public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string |
6922
|
|
|
{ |
6923
|
|
|
if ( |
6924
|
1 |
|
$separator === '' |
6925
|
|
|
|| |
6926
|
1 |
|
$str === '' |
6927
|
|
|
) { |
6928
|
1 |
|
return ''; |
6929
|
|
|
} |
6930
|
|
|
|
6931
|
1 |
|
$offset = self::str_index_last($str, $separator); |
6932
|
1 |
|
if ($offset === false) { |
6933
|
1 |
|
return ''; |
6934
|
|
|
} |
6935
|
|
|
|
6936
|
1 |
|
return (string) self::substr( |
6937
|
1 |
|
$str, |
6938
|
1 |
|
0, |
6939
|
1 |
|
$offset, |
6940
|
1 |
|
$encoding |
6941
|
|
|
); |
6942
|
|
|
} |
6943
|
|
|
|
6944
|
|
|
/** |
6945
|
|
|
* Gets the substring after (or before via "$beforeNeedle") the first occurrence of the "$needle". |
6946
|
|
|
* |
6947
|
|
|
* @param string $str <p>The input string.</p> |
6948
|
|
|
* @param string $needle <p>The string to look for.</p> |
6949
|
|
|
* @param bool $beforeNeedle [optional] <p>Default: false</p> |
6950
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6951
|
|
|
* |
6952
|
|
|
* @return string |
6953
|
|
|
*/ |
6954
|
2 |
|
public static function str_substr_first(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string |
6955
|
|
|
{ |
6956
|
|
|
if ( |
6957
|
2 |
|
$str === '' |
6958
|
|
|
|| |
6959
|
2 |
|
$needle === '' |
6960
|
|
|
) { |
6961
|
2 |
|
return ''; |
6962
|
|
|
} |
6963
|
|
|
|
6964
|
2 |
|
$part = self::strstr( |
6965
|
2 |
|
$str, |
6966
|
2 |
|
$needle, |
6967
|
2 |
|
$beforeNeedle, |
6968
|
2 |
|
$encoding |
6969
|
|
|
); |
6970
|
2 |
|
if ($part === false) { |
6971
|
2 |
|
return ''; |
6972
|
|
|
} |
6973
|
|
|
|
6974
|
2 |
|
return $part; |
6975
|
|
|
} |
6976
|
|
|
|
6977
|
|
|
/** |
6978
|
|
|
* Gets the substring after (or before via "$beforeNeedle") the last occurrence of the "$needle". |
6979
|
|
|
* |
6980
|
|
|
* @param string $str <p>The input string.</p> |
6981
|
|
|
* @param string $needle <p>The string to look for.</p> |
6982
|
|
|
* @param bool $beforeNeedle [optional] <p>Default: false</p> |
6983
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
6984
|
|
|
* |
6985
|
|
|
* @return string |
6986
|
|
|
*/ |
6987
|
2 |
|
public static function str_substr_last(string $str, string $needle, bool $beforeNeedle = false, string $encoding = 'UTF-8'): string |
6988
|
|
|
{ |
6989
|
|
|
if ( |
6990
|
2 |
|
$str === '' |
6991
|
|
|
|| |
6992
|
2 |
|
$needle === '' |
6993
|
|
|
) { |
6994
|
2 |
|
return ''; |
6995
|
|
|
} |
6996
|
|
|
|
6997
|
2 |
|
$part = self::strrchr($str, $needle, $beforeNeedle, $encoding); |
6998
|
2 |
|
if ($part === false) { |
6999
|
2 |
|
return ''; |
7000
|
|
|
} |
7001
|
|
|
|
7002
|
2 |
|
return $part; |
7003
|
|
|
} |
7004
|
|
|
|
7005
|
|
|
/** |
7006
|
|
|
* Surrounds $str with the given substring. |
7007
|
|
|
* |
7008
|
|
|
* @param string $str |
7009
|
|
|
* @param string $substring <p>The substring to add to both sides.</P> |
7010
|
|
|
* |
7011
|
|
|
* @return string string with the substring both prepended and appended |
7012
|
|
|
*/ |
7013
|
5 |
|
public static function str_surround(string $str, string $substring): string |
7014
|
|
|
{ |
7015
|
5 |
|
return \implode('', [$substring, $str, $substring]); |
7016
|
|
|
} |
7017
|
|
|
|
7018
|
|
|
/** |
7019
|
|
|
* Returns a trimmed string with the first letter of each word capitalized. |
7020
|
|
|
* Also accepts an array, $ignore, allowing you to list words not to be |
7021
|
|
|
* capitalized. |
7022
|
|
|
* |
7023
|
|
|
* @param string $str |
7024
|
|
|
* @param array|string[]|null $ignore [optional] <p>An array of words not to capitalize or null. |
7025
|
|
|
* Default: null</p> |
7026
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
7027
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
7028
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, |
7029
|
|
|
* tr</p> |
7030
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> |
7031
|
|
|
* ß</p> |
7032
|
|
|
* @param bool $useTrimFirst [optional] <p>true === trim the input string, first</p> |
7033
|
|
|
* |
7034
|
|
|
* @return string the titleized string |
7035
|
|
|
*/ |
7036
|
10 |
|
public static function str_titleize( |
7037
|
|
|
string $str, |
7038
|
|
|
array $ignore = null, |
7039
|
|
|
string $encoding = 'UTF-8', |
7040
|
|
|
bool $cleanUtf8 = false, |
7041
|
|
|
string $lang = null, |
7042
|
|
|
bool $tryToKeepStringLength = false, |
7043
|
|
|
bool $useTrimFirst = true |
7044
|
|
|
): string { |
7045
|
10 |
|
if ($useTrimFirst === true) { |
7046
|
5 |
|
$str = self::trim($str); |
7047
|
|
|
} |
7048
|
|
|
|
7049
|
10 |
|
$str_array = self::str_to_words($str); |
7050
|
|
|
|
7051
|
10 |
|
foreach ($str_array as &$str_tmp) { |
7052
|
10 |
|
if ($ignore && \in_array($str_tmp, $ignore, true)) { |
7053
|
2 |
|
continue; |
7054
|
|
|
} |
7055
|
|
|
|
7056
|
10 |
|
$str_tmp = self::str_upper_first( |
7057
|
10 |
|
self::strtolower( |
7058
|
10 |
|
$str_tmp, |
7059
|
10 |
|
$encoding, |
7060
|
10 |
|
$cleanUtf8, |
7061
|
10 |
|
$lang, |
7062
|
10 |
|
$tryToKeepStringLength |
7063
|
|
|
), |
7064
|
10 |
|
$encoding, |
7065
|
10 |
|
$cleanUtf8, |
7066
|
10 |
|
$lang, |
7067
|
10 |
|
$tryToKeepStringLength |
7068
|
|
|
); |
7069
|
|
|
} |
7070
|
|
|
|
7071
|
10 |
|
return \implode('', $str_array); |
7072
|
|
|
} |
7073
|
|
|
|
7074
|
|
|
/** |
7075
|
|
|
* Returns a trimmed string in proper title case. |
7076
|
|
|
* |
7077
|
|
|
* Also accepts an array, $ignore, allowing you to list words not to be |
7078
|
|
|
* capitalized. |
7079
|
|
|
* |
7080
|
|
|
* Adapted from John Gruber's script. |
7081
|
|
|
* |
7082
|
|
|
* @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78 |
7083
|
|
|
* |
7084
|
|
|
* @param string $str |
7085
|
|
|
* @param array $ignore <p>An array of words not to capitalize.</p> |
7086
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
7087
|
|
|
* |
7088
|
|
|
* @return string the titleized string |
7089
|
|
|
*/ |
7090
|
35 |
|
public static function str_titleize_for_humans(string $str, array $ignore = [], string $encoding = 'UTF-8'): string |
7091
|
|
|
{ |
7092
|
35 |
|
$smallWords = \array_merge( |
7093
|
|
|
[ |
7094
|
35 |
|
'(?<!q&)a', |
7095
|
|
|
'an', |
7096
|
|
|
'and', |
7097
|
|
|
'as', |
7098
|
|
|
'at(?!&t)', |
7099
|
|
|
'but', |
7100
|
|
|
'by', |
7101
|
|
|
'en', |
7102
|
|
|
'for', |
7103
|
|
|
'if', |
7104
|
|
|
'in', |
7105
|
|
|
'of', |
7106
|
|
|
'on', |
7107
|
|
|
'or', |
7108
|
|
|
'the', |
7109
|
|
|
'to', |
7110
|
|
|
'v[.]?', |
7111
|
|
|
'via', |
7112
|
|
|
'vs[.]?', |
7113
|
|
|
], |
7114
|
35 |
|
$ignore |
7115
|
|
|
); |
7116
|
|
|
|
7117
|
35 |
|
$smallWordsRx = \implode('|', $smallWords); |
7118
|
35 |
|
$apostropheRx = '(?x: [\'’] [[:lower:]]* )?'; |
7119
|
|
|
|
7120
|
35 |
|
$str = self::trim($str); |
7121
|
|
|
|
7122
|
35 |
|
if (self::has_lowercase($str) === false) { |
7123
|
2 |
|
$str = self::strtolower($str); |
7124
|
|
|
} |
7125
|
|
|
|
7126
|
|
|
// The main substitutions |
7127
|
35 |
|
$str = (string) \preg_replace_callback( |
7128
|
|
|
'~\b (_*) (?: # 1. Leading underscore and |
7129
|
|
|
( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ | # 2. file path or |
7130
|
35 |
|
[-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostropheRx . ' ) # URL, domain, or email |
7131
|
|
|
| |
7132
|
35 |
|
( (?i: ' . $smallWordsRx . ' ) ' . $apostropheRx . ' ) # 3. or small word (case-insensitive) |
7133
|
|
|
| |
7134
|
35 |
|
( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostropheRx . ' ) # 4. or word w/o internal caps |
7135
|
|
|
| |
7136
|
35 |
|
( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostropheRx . ' ) # 5. or some other word |
7137
|
|
|
) (_*) \b # 6. With trailing underscore |
7138
|
|
|
~ux', |
7139
|
|
|
function ($matches) use ($encoding) { |
7140
|
|
|
// Preserve leading underscore |
7141
|
35 |
|
$str = $matches[1]; |
7142
|
35 |
|
if ($matches[2]) { |
7143
|
|
|
// Preserve URLs, domains, emails and file paths |
7144
|
5 |
|
$str .= $matches[2]; |
7145
|
35 |
|
} elseif ($matches[3]) { |
7146
|
|
|
// Lower-case small words |
7147
|
25 |
|
$str .= self::strtolower($matches[3], $encoding); |
7148
|
35 |
|
} elseif ($matches[4]) { |
7149
|
|
|
// Capitalize word w/o internal caps |
7150
|
34 |
|
$str .= static::str_upper_first($matches[4], $encoding); |
7151
|
|
|
} else { |
7152
|
|
|
// Preserve other kinds of word (iPhone) |
7153
|
7 |
|
$str .= $matches[5]; |
7154
|
|
|
} |
7155
|
|
|
// Preserve trailing underscore |
7156
|
35 |
|
$str .= $matches[6]; |
7157
|
|
|
|
7158
|
35 |
|
return $str; |
7159
|
35 |
|
}, |
7160
|
35 |
|
$str |
7161
|
|
|
); |
7162
|
|
|
|
7163
|
|
|
// Exceptions for small words: capitalize at start of title... |
7164
|
35 |
|
$str = (string) \preg_replace_callback( |
7165
|
|
|
'~( \A [[:punct:]]* # start of title... |
7166
|
|
|
| [:.;?!][ ]+ # or of subsentence... |
7167
|
|
|
| [ ][\'"“‘(\[][ ]* ) # or of inserted subphrase... |
7168
|
35 |
|
( ' . $smallWordsRx . ' ) \b # ...followed by small word |
7169
|
|
|
~uxi', |
7170
|
|
|
function ($matches) use ($encoding) { |
7171
|
11 |
|
return $matches[1] . static::str_upper_first($matches[2], $encoding); |
7172
|
35 |
|
}, |
7173
|
35 |
|
$str |
7174
|
|
|
); |
7175
|
|
|
|
7176
|
|
|
// ...and end of title |
7177
|
35 |
|
$str = (string) \preg_replace_callback( |
7178
|
35 |
|
'~\b ( ' . $smallWordsRx . ' ) # small word... |
7179
|
|
|
(?= [[:punct:]]* \Z # ...at the end of the title... |
7180
|
|
|
| [\'"’”)\]] [ ] ) # ...or of an inserted subphrase? |
7181
|
|
|
~uxi', |
7182
|
|
|
function ($matches) use ($encoding) { |
7183
|
3 |
|
return static::str_upper_first($matches[1], $encoding); |
7184
|
35 |
|
}, |
7185
|
35 |
|
$str |
7186
|
|
|
); |
7187
|
|
|
|
7188
|
|
|
// Exceptions for small words in hyphenated compound words |
7189
|
|
|
// e.g. "in-flight" -> In-Flight |
7190
|
35 |
|
$str = (string) \preg_replace_callback( |
7191
|
|
|
'~\b |
7192
|
|
|
(?<! -) # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight) |
7193
|
35 |
|
( ' . $smallWordsRx . ' ) |
7194
|
|
|
(?= -[[:alpha:]]+) # lookahead for "-someword" |
7195
|
|
|
~uxi', |
7196
|
|
|
function ($matches) use ($encoding) { |
7197
|
|
|
return static::str_upper_first($matches[1], $encoding); |
7198
|
35 |
|
}, |
7199
|
35 |
|
$str |
7200
|
|
|
); |
7201
|
|
|
|
7202
|
|
|
// e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point) |
7203
|
35 |
|
$str = (string) \preg_replace_callback( |
7204
|
|
|
'~\b |
7205
|
|
|
(?<!…) # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in) |
7206
|
|
|
( [[:alpha:]]+- ) # $1 = first word and hyphen, should already be properly capped |
7207
|
35 |
|
( ' . $smallWordsRx . ' ) # ...followed by small word |
7208
|
|
|
(?! - ) # Negative lookahead for another - |
7209
|
|
|
~uxi', |
7210
|
|
|
function ($matches) use ($encoding) { |
7211
|
|
|
return $matches[1] . static::str_upper_first($matches[2], $encoding); |
7212
|
35 |
|
}, |
7213
|
35 |
|
$str |
7214
|
|
|
); |
7215
|
|
|
|
7216
|
35 |
|
return $str; |
7217
|
|
|
} |
7218
|
|
|
|
7219
|
|
|
/** |
7220
|
|
|
* Get a binary representation of a specific string. |
7221
|
|
|
* |
7222
|
|
|
* @param string $str <p>The input string.</p> |
7223
|
|
|
* |
7224
|
|
|
* @return string |
7225
|
|
|
*/ |
7226
|
2 |
|
public static function str_to_binary(string $str): string |
7227
|
|
|
{ |
7228
|
2 |
|
$value = \unpack('H*', $str); |
7229
|
|
|
|
7230
|
2 |
|
return \base_convert($value[1], 16, 2); |
7231
|
|
|
} |
7232
|
|
|
|
7233
|
|
|
/** |
7234
|
|
|
* @param string $str |
7235
|
|
|
* @param bool $removeEmptyValues <p>Remove empty values.</p> |
7236
|
|
|
* @param int|null $removeShortValues <p>The min. string length or null to disable</p> |
7237
|
|
|
* |
7238
|
|
|
* @return string[] |
7239
|
|
|
*/ |
7240
|
17 |
|
public static function str_to_lines(string $str, bool $removeEmptyValues = false, int $removeShortValues = null): array |
7241
|
|
|
{ |
7242
|
17 |
|
if ($str === '') { |
7243
|
1 |
|
return $removeEmptyValues === true ? [] : ['']; |
7244
|
|
|
} |
7245
|
|
|
|
7246
|
16 |
|
$return = \preg_split("/[\r\n]{1,2}/u", $str); |
7247
|
|
|
|
7248
|
16 |
|
if ($return === false) { |
7249
|
|
|
return $removeEmptyValues === true ? [] : ['']; |
7250
|
|
|
} |
7251
|
|
|
|
7252
|
|
|
if ( |
7253
|
16 |
|
$removeShortValues === null |
7254
|
|
|
&& |
7255
|
16 |
|
$removeEmptyValues === false |
7256
|
|
|
) { |
7257
|
16 |
|
return $return; |
|
|
|
|
7258
|
|
|
} |
7259
|
|
|
|
7260
|
|
|
return self::reduce_string_array( |
7261
|
|
|
$return, |
7262
|
|
|
$removeEmptyValues, |
7263
|
|
|
$removeShortValues |
7264
|
|
|
); |
7265
|
|
|
} |
7266
|
|
|
|
7267
|
|
|
/** |
7268
|
|
|
* Convert a string into an array of words. |
7269
|
|
|
* |
7270
|
|
|
* @param string $str |
7271
|
|
|
* @param string $charList <p>Additional chars for the definition of "words".</p> |
7272
|
|
|
* @param bool $removeEmptyValues <p>Remove empty values.</p> |
7273
|
|
|
* @param int|null $removeShortValues <p>The min. string length or null to disable</p> |
7274
|
|
|
* |
7275
|
|
|
* @return string[] |
7276
|
|
|
*/ |
7277
|
23 |
|
public static function str_to_words(string $str, string $charList = '', bool $removeEmptyValues = false, int $removeShortValues = null): array |
7278
|
|
|
{ |
7279
|
23 |
|
if ($str === '') { |
7280
|
4 |
|
return $removeEmptyValues === true ? [] : ['']; |
7281
|
|
|
} |
7282
|
|
|
|
7283
|
23 |
|
$charList = self::rxClass($charList, '\pL'); |
7284
|
|
|
|
7285
|
23 |
|
$return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE); |
7286
|
|
|
|
7287
|
23 |
|
if ($return === false) { |
7288
|
|
|
return $removeEmptyValues === true ? [] : ['']; |
7289
|
|
|
} |
7290
|
|
|
|
7291
|
|
|
if ( |
7292
|
23 |
|
$removeShortValues === null |
7293
|
|
|
&& |
7294
|
23 |
|
$removeEmptyValues === false |
7295
|
|
|
) { |
7296
|
23 |
|
return $return; |
|
|
|
|
7297
|
|
|
} |
7298
|
|
|
|
7299
|
2 |
|
$tmpReturn = self::reduce_string_array( |
7300
|
2 |
|
$return, |
7301
|
2 |
|
$removeEmptyValues, |
7302
|
2 |
|
$removeShortValues |
7303
|
|
|
); |
7304
|
|
|
|
7305
|
2 |
|
foreach ($tmpReturn as &$item) { |
7306
|
2 |
|
$item = (string) $item; |
7307
|
|
|
} |
7308
|
|
|
|
7309
|
2 |
|
return $tmpReturn; |
7310
|
|
|
} |
7311
|
|
|
|
7312
|
|
|
/** |
7313
|
|
|
* alias for "UTF8::to_ascii()" |
7314
|
|
|
* |
7315
|
|
|
* @see UTF8::to_ascii() |
7316
|
|
|
* |
7317
|
|
|
* @param string $str |
7318
|
|
|
* @param string $unknown |
7319
|
|
|
* @param bool $strict |
7320
|
|
|
* |
7321
|
|
|
* @return string |
7322
|
|
|
*/ |
7323
|
8 |
|
public static function str_transliterate(string $str, string $unknown = '?', bool $strict = false): string |
7324
|
|
|
{ |
7325
|
8 |
|
return self::to_ascii($str, $unknown, $strict); |
7326
|
|
|
} |
7327
|
|
|
|
7328
|
|
|
/** |
7329
|
|
|
* Truncates the string to a given length. If $substring is provided, and |
7330
|
|
|
* truncating occurs, the string is further truncated so that the substring |
7331
|
|
|
* may be appended without exceeding the desired length. |
7332
|
|
|
* |
7333
|
|
|
* @param string $str |
7334
|
|
|
* @param int $length <p>Desired length of the truncated string.</p> |
7335
|
|
|
* @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p> |
7336
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
7337
|
|
|
* |
7338
|
|
|
* @return string string after truncating |
7339
|
|
|
*/ |
7340
|
22 |
|
public static function str_truncate($str, int $length, string $substring = '', string $encoding = 'UTF-8'): string |
7341
|
|
|
{ |
7342
|
|
|
// init |
7343
|
22 |
|
$str = (string) $str; |
7344
|
|
|
|
7345
|
22 |
|
if ($str === '') { |
7346
|
|
|
return ''; |
7347
|
|
|
} |
7348
|
|
|
|
7349
|
22 |
|
if ($length >= self::strlen($str, $encoding)) { |
7350
|
4 |
|
return $str; |
7351
|
|
|
} |
7352
|
|
|
|
7353
|
|
|
// Need to further trim the string so we can append the substring |
7354
|
18 |
|
$substringLength = self::strlen($substring, $encoding); |
7355
|
18 |
|
$length -= $substringLength; |
7356
|
|
|
|
7357
|
18 |
|
$truncated = self::substr($str, 0, $length, $encoding); |
7358
|
|
|
|
7359
|
18 |
|
return $truncated . $substring; |
|
|
|
|
7360
|
|
|
} |
7361
|
|
|
|
7362
|
|
|
/** |
7363
|
|
|
* Truncates the string to a given length, while ensuring that it does not |
7364
|
|
|
* split words. If $substring is provided, and truncating occurs, the |
7365
|
|
|
* string is further truncated so that the substring may be appended without |
7366
|
|
|
* exceeding the desired length. |
7367
|
|
|
* |
7368
|
|
|
* @param string $str |
7369
|
|
|
* @param int $length <p>Desired length of the truncated string.</p> |
7370
|
|
|
* @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p> |
7371
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
7372
|
|
|
* |
7373
|
|
|
* @return string string after truncating |
7374
|
|
|
*/ |
7375
|
23 |
|
public static function str_truncate_safe(string $str, int $length, string $substring = '', string $encoding = 'UTF-8'): string |
7376
|
|
|
{ |
7377
|
23 |
|
if ($length >= self::strlen($str, $encoding)) { |
7378
|
4 |
|
return $str; |
7379
|
|
|
} |
7380
|
|
|
|
7381
|
|
|
// need to further trim the string so we can append the substring |
7382
|
19 |
|
$substringLength = self::strlen($substring, $encoding); |
7383
|
19 |
|
$length -= $substringLength; |
7384
|
|
|
|
7385
|
19 |
|
$truncated = self::substr($str, 0, $length, $encoding); |
7386
|
19 |
|
if ($truncated === false) { |
7387
|
|
|
return ''; |
7388
|
|
|
} |
7389
|
|
|
|
7390
|
|
|
// if the last word was truncated |
7391
|
19 |
|
$strPosSpace = self::strpos($str, ' ', $length - 1, $encoding); |
7392
|
19 |
|
if ($strPosSpace !== $length) { |
7393
|
|
|
// find pos of the last occurrence of a space, get up to that |
7394
|
12 |
|
$lastPos = self::strrpos($truncated, ' ', 0, $encoding); |
7395
|
|
|
|
7396
|
12 |
|
if ($lastPos !== false || $strPosSpace !== false) { |
7397
|
11 |
|
$truncated = self::substr($truncated, 0, (int) $lastPos, $encoding); |
7398
|
|
|
} |
7399
|
|
|
} |
7400
|
|
|
|
7401
|
19 |
|
return $truncated . $substring; |
|
|
|
|
7402
|
|
|
} |
7403
|
|
|
|
7404
|
|
|
/** |
7405
|
|
|
* Returns a lowercase and trimmed string separated by underscores. |
7406
|
|
|
* Underscores are inserted before uppercase characters (with the exception |
7407
|
|
|
* of the first character of the string), and in place of spaces as well as |
7408
|
|
|
* dashes. |
7409
|
|
|
* |
7410
|
|
|
* @param string $str |
7411
|
|
|
* |
7412
|
|
|
* @return string the underscored string |
7413
|
|
|
*/ |
7414
|
16 |
|
public static function str_underscored(string $str): string |
7415
|
|
|
{ |
7416
|
16 |
|
return self::str_delimit($str, '_'); |
7417
|
|
|
} |
7418
|
|
|
|
7419
|
|
|
/** |
7420
|
|
|
* Returns an UpperCamelCase version of the supplied string. It trims |
7421
|
|
|
* surrounding spaces, capitalizes letters following digits, spaces, dashes |
7422
|
|
|
* and underscores, and removes spaces, dashes, underscores. |
7423
|
|
|
* |
7424
|
|
|
* @param string $str <p>The input string.</p> |
7425
|
|
|
* @param string $encoding [optional] <p>Default: UTF-8</p> |
7426
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
7427
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
7428
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
7429
|
|
|
* |
7430
|
|
|
* @return string string in UpperCamelCase |
7431
|
|
|
*/ |
7432
|
13 |
|
public static function str_upper_camelize(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string |
7433
|
|
|
{ |
7434
|
13 |
|
return self::str_upper_first(self::str_camelize($str, $encoding), $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
7435
|
|
|
} |
7436
|
|
|
|
7437
|
|
|
/** |
7438
|
|
|
* alias for "UTF8::ucfirst()" |
7439
|
|
|
* |
7440
|
|
|
* @see UTF8::ucfirst() |
7441
|
|
|
* |
7442
|
|
|
* @param string $str |
7443
|
|
|
* @param string $encoding |
7444
|
|
|
* @param bool $cleanUtf8 |
7445
|
|
|
* @param string|null $lang |
7446
|
|
|
* @param bool $tryToKeepStringLength |
7447
|
|
|
* |
7448
|
|
|
* @return string |
7449
|
|
|
*/ |
7450
|
63 |
|
public static function str_upper_first(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string |
7451
|
|
|
{ |
7452
|
63 |
|
return self::ucfirst($str, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength); |
7453
|
|
|
} |
7454
|
|
|
|
7455
|
|
|
/** |
7456
|
|
|
* Counts number of words in the UTF-8 string. |
7457
|
|
|
* |
7458
|
|
|
* @param string $str <p>The input string.</p> |
7459
|
|
|
* @param int $format [optional] <p> |
7460
|
|
|
* <strong>0</strong> => return a number of words (default)<br> |
7461
|
|
|
* <strong>1</strong> => return an array of words<br> |
7462
|
|
|
* <strong>2</strong> => return an array of words with word-offset as key |
7463
|
|
|
* </p> |
7464
|
|
|
* @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p> |
7465
|
|
|
* |
7466
|
|
|
* @return int|string[] The number of words in the string |
7467
|
|
|
*/ |
7468
|
2 |
|
public static function str_word_count(string $str, int $format = 0, string $charlist = '') |
7469
|
|
|
{ |
7470
|
2 |
|
$strParts = self::str_to_words($str, $charlist); |
7471
|
|
|
|
7472
|
2 |
|
$len = \count($strParts); |
7473
|
|
|
|
7474
|
2 |
|
if ($format === 1) { |
7475
|
2 |
|
$numberOfWords = []; |
7476
|
2 |
|
for ($i = 1; $i < $len; $i += 2) { |
7477
|
2 |
|
$numberOfWords[] = $strParts[$i]; |
7478
|
|
|
} |
7479
|
2 |
|
} elseif ($format === 2) { |
7480
|
2 |
|
$numberOfWords = []; |
7481
|
2 |
|
$offset = self::strlen($strParts[0]); |
7482
|
2 |
|
for ($i = 1; $i < $len; $i += 2) { |
7483
|
2 |
|
$numberOfWords[$offset] = $strParts[$i]; |
7484
|
2 |
|
$offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]); |
7485
|
|
|
} |
7486
|
|
|
} else { |
7487
|
2 |
|
$numberOfWords = (int) (($len - 1) / 2); |
7488
|
|
|
} |
7489
|
|
|
|
7490
|
2 |
|
return $numberOfWords; |
7491
|
|
|
} |
7492
|
|
|
|
7493
|
|
|
/** |
7494
|
|
|
* Case-insensitive string comparison. |
7495
|
|
|
* |
7496
|
|
|
* INFO: Case-insensitive version of UTF8::strcmp() |
7497
|
|
|
* |
7498
|
|
|
* @param string $str1 <p>The first string.</p> |
7499
|
|
|
* @param string $str2 <p>The second string.</p> |
7500
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
7501
|
|
|
* |
7502
|
|
|
* @return int |
7503
|
|
|
* <strong>< 0</strong> if str1 is less than str2;<br> |
7504
|
|
|
* <strong>> 0</strong> if str1 is greater than str2,<br> |
7505
|
|
|
* <strong>0</strong> if they are equal |
7506
|
|
|
*/ |
7507
|
23 |
|
public static function strcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int |
7508
|
|
|
{ |
7509
|
23 |
|
return self::strcmp( |
7510
|
23 |
|
self::strtocasefold($str1, true, false, $encoding, null, false), |
7511
|
23 |
|
self::strtocasefold($str2, true, false, $encoding, null, false) |
7512
|
|
|
); |
7513
|
|
|
} |
7514
|
|
|
|
7515
|
|
|
/** |
7516
|
|
|
* alias for "UTF8::strstr()" |
7517
|
|
|
* |
7518
|
|
|
* @see UTF8::strstr() |
7519
|
|
|
* |
7520
|
|
|
* @param string $haystack |
7521
|
|
|
* @param string $needle |
7522
|
|
|
* @param bool $before_needle |
7523
|
|
|
* @param string $encoding |
7524
|
|
|
* @param bool $cleanUtf8 |
7525
|
|
|
* |
7526
|
|
|
* @return false|string |
7527
|
|
|
*/ |
7528
|
2 |
|
public static function strchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false) |
7529
|
|
|
{ |
7530
|
2 |
|
return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8); |
7531
|
|
|
} |
7532
|
|
|
|
7533
|
|
|
/** |
7534
|
|
|
* Case-sensitive string comparison. |
7535
|
|
|
* |
7536
|
|
|
* @param string $str1 <p>The first string.</p> |
7537
|
|
|
* @param string $str2 <p>The second string.</p> |
7538
|
|
|
* |
7539
|
|
|
* @return int |
7540
|
|
|
* <strong>< 0</strong> if str1 is less than str2<br> |
7541
|
|
|
* <strong>> 0</strong> if str1 is greater than str2<br> |
7542
|
|
|
* <strong>0</strong> if they are equal |
7543
|
|
|
*/ |
7544
|
29 |
|
public static function strcmp(string $str1, string $str2): int |
7545
|
|
|
{ |
7546
|
|
|
/** @noinspection PhpUndefinedClassInspection */ |
7547
|
29 |
|
return $str1 . '' === $str2 . '' ? 0 : \strcmp( |
7548
|
24 |
|
\Normalizer::normalize($str1, \Normalizer::NFD), |
7549
|
29 |
|
\Normalizer::normalize($str2, \Normalizer::NFD) |
7550
|
|
|
); |
7551
|
|
|
} |
7552
|
|
|
|
7553
|
|
|
/** |
7554
|
|
|
* Find length of initial segment not matching mask. |
7555
|
|
|
* |
7556
|
|
|
* @param string $str |
7557
|
|
|
* @param string $charList |
7558
|
|
|
* @param int $offset |
7559
|
|
|
* @param int $length |
7560
|
|
|
* |
7561
|
|
|
* @return int|null |
7562
|
|
|
*/ |
7563
|
11 |
|
public static function strcspn(string $str, string $charList, int $offset = 0, int $length = null) |
7564
|
|
|
{ |
7565
|
11 |
|
if ($charList === '') { |
7566
|
1 |
|
return null; |
7567
|
|
|
} |
7568
|
|
|
|
7569
|
10 |
|
if ($offset || $length !== null) { |
7570
|
2 |
|
$strTmp = self::substr($str, $offset, $length); |
7571
|
2 |
|
if ($strTmp === false) { |
7572
|
|
|
return null; |
7573
|
|
|
} |
7574
|
2 |
|
$str = $strTmp; |
7575
|
|
|
} |
7576
|
|
|
|
7577
|
10 |
|
if ($str === '') { |
7578
|
1 |
|
return null; |
7579
|
|
|
} |
7580
|
|
|
|
7581
|
9 |
|
if (\preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) { |
|
|
|
|
7582
|
9 |
|
return self::strlen($length[1]); |
|
|
|
|
7583
|
|
|
} |
7584
|
|
|
|
7585
|
1 |
|
return self::strlen($str); |
|
|
|
|
7586
|
|
|
} |
7587
|
|
|
|
7588
|
|
|
/** |
7589
|
|
|
* alias for "UTF8::stristr()" |
7590
|
|
|
* |
7591
|
|
|
* @see UTF8::stristr() |
7592
|
|
|
* |
7593
|
|
|
* @param string $haystack |
7594
|
|
|
* @param string $needle |
7595
|
|
|
* @param bool $before_needle |
7596
|
|
|
* @param string $encoding |
7597
|
|
|
* @param bool $cleanUtf8 |
7598
|
|
|
* |
7599
|
|
|
* @return false|string |
7600
|
|
|
*/ |
7601
|
1 |
|
public static function strichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false) |
7602
|
|
|
{ |
7603
|
1 |
|
return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8); |
7604
|
|
|
} |
7605
|
|
|
|
7606
|
|
|
/** |
7607
|
|
|
* Create a UTF-8 string from code points. |
7608
|
|
|
* |
7609
|
|
|
* INFO: opposite to UTF8::codepoints() |
7610
|
|
|
* |
7611
|
|
|
* @param array $array <p>Integer or Hexadecimal codepoints.</p> |
7612
|
|
|
* |
7613
|
|
|
* @return string UTF-8 encoded string |
7614
|
|
|
*/ |
7615
|
4 |
|
public static function string(array $array): string |
7616
|
|
|
{ |
7617
|
4 |
|
return \implode( |
7618
|
4 |
|
'', |
7619
|
4 |
|
\array_map( |
7620
|
|
|
[ |
7621
|
4 |
|
self::class, |
7622
|
|
|
'chr', |
7623
|
|
|
], |
7624
|
4 |
|
$array |
7625
|
|
|
) |
7626
|
|
|
); |
7627
|
|
|
} |
7628
|
|
|
|
7629
|
|
|
/** |
7630
|
|
|
* Checks if string starts with "BOM" (Byte Order Mark Character) character. |
7631
|
|
|
* |
7632
|
|
|
* @param string $str <p>The input string.</p> |
7633
|
|
|
* |
7634
|
|
|
* @return bool |
7635
|
|
|
* <strong>true</strong> if the string has BOM at the start,<br> |
7636
|
|
|
* <strong>false</strong> otherwise |
7637
|
|
|
*/ |
7638
|
6 |
|
public static function string_has_bom(string $str): bool |
7639
|
|
|
{ |
7640
|
6 |
|
foreach (self::$BOM as $bomString => $bomByteLength) { |
7641
|
6 |
|
if (\strpos($str, $bomString) === 0) { |
7642
|
6 |
|
return true; |
7643
|
|
|
} |
7644
|
|
|
} |
7645
|
|
|
|
7646
|
6 |
|
return false; |
7647
|
|
|
} |
7648
|
|
|
|
7649
|
|
|
/** |
7650
|
|
|
* Strip HTML and PHP tags from a string + clean invalid UTF-8. |
7651
|
|
|
* |
7652
|
|
|
* @see http://php.net/manual/en/function.strip-tags.php |
7653
|
|
|
* |
7654
|
|
|
* @param string $str <p> |
7655
|
|
|
* The input string. |
7656
|
|
|
* </p> |
7657
|
|
|
* @param string $allowable_tags [optional] <p> |
7658
|
|
|
* You can use the optional second parameter to specify tags which should |
7659
|
|
|
* not be stripped. |
7660
|
|
|
* </p> |
7661
|
|
|
* <p> |
7662
|
|
|
* HTML comments and PHP tags are also stripped. This is hardcoded and |
7663
|
|
|
* can not be changed with allowable_tags. |
7664
|
|
|
* </p> |
7665
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
7666
|
|
|
* |
7667
|
|
|
* @return string the stripped string |
7668
|
|
|
*/ |
7669
|
4 |
|
public static function strip_tags(string $str, string $allowable_tags = null, bool $cleanUtf8 = false): string |
7670
|
|
|
{ |
7671
|
4 |
|
if ($str === '') { |
7672
|
1 |
|
return ''; |
7673
|
|
|
} |
7674
|
|
|
|
7675
|
4 |
|
if ($cleanUtf8 === true) { |
7676
|
2 |
|
$str = self::clean($str); |
7677
|
|
|
} |
7678
|
|
|
|
7679
|
4 |
|
return \strip_tags($str, $allowable_tags); |
7680
|
|
|
} |
7681
|
|
|
|
7682
|
|
|
/** |
7683
|
|
|
* Strip all whitespace characters. This includes tabs and newline |
7684
|
|
|
* characters, as well as multibyte whitespace such as the thin space |
7685
|
|
|
* and ideographic space. |
7686
|
|
|
* |
7687
|
|
|
* @param string $str |
7688
|
|
|
* |
7689
|
|
|
* @return string |
7690
|
|
|
*/ |
7691
|
36 |
|
public static function strip_whitespace(string $str): string |
7692
|
|
|
{ |
7693
|
36 |
|
if ($str === '') { |
7694
|
3 |
|
return ''; |
7695
|
|
|
} |
7696
|
|
|
|
7697
|
33 |
|
return (string) \preg_replace('/[[:space:]]+/u', '', $str); |
7698
|
|
|
} |
7699
|
|
|
|
7700
|
|
|
/** |
7701
|
|
|
* Finds position of first occurrence of a string within another, case insensitive. |
7702
|
|
|
* |
7703
|
|
|
* @see http://php.net/manual/en/function.mb-stripos.php |
7704
|
|
|
* |
7705
|
|
|
* @param string $haystack <p>The string from which to get the position of the first occurrence of needle.</p> |
7706
|
|
|
* @param string $needle <p>The string to find in haystack.</p> |
7707
|
|
|
* @param int $offset [optional] <p>The position in haystack to start searching.</p> |
7708
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
7709
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
7710
|
|
|
* |
7711
|
|
|
* @return false|int |
7712
|
|
|
* Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the |
7713
|
|
|
* haystack string,<br> or <strong>false</strong> if needle is not found |
7714
|
|
|
*/ |
7715
|
75 |
|
public static function stripos(string $haystack, string $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false) |
7716
|
|
|
{ |
7717
|
75 |
|
if ($haystack === '' || $needle === '') { |
7718
|
5 |
|
return false; |
7719
|
|
|
} |
7720
|
|
|
|
7721
|
74 |
|
if ($cleanUtf8 === true) { |
7722
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
7723
|
|
|
// if invalid characters are found in $haystack before $needle |
7724
|
1 |
|
$haystack = self::clean($haystack); |
7725
|
1 |
|
$needle = self::clean($needle); |
7726
|
|
|
} |
7727
|
|
|
|
7728
|
74 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
7729
|
23 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
7730
|
|
|
} |
7731
|
|
|
|
7732
|
74 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
7733
|
|
|
self::checkForSupport(); |
7734
|
|
|
} |
7735
|
|
|
|
7736
|
74 |
|
if (self::$SUPPORT['mbstring'] === true) { |
7737
|
74 |
|
$returnTmp = \mb_stripos($haystack, $needle, $offset, $encoding); |
7738
|
74 |
|
if ($returnTmp !== false) { |
7739
|
54 |
|
return $returnTmp; |
7740
|
|
|
} |
7741
|
|
|
} |
7742
|
|
|
|
7743
|
|
|
if ( |
7744
|
31 |
|
$encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings |
7745
|
|
|
&& |
7746
|
31 |
|
$offset >= 0 // grapheme_stripos() can't handle negative offset |
7747
|
|
|
&& |
7748
|
31 |
|
self::$SUPPORT['intl'] === true |
7749
|
|
|
) { |
7750
|
31 |
|
$returnTmp = \grapheme_stripos($haystack, $needle, $offset); |
7751
|
31 |
|
if ($returnTmp !== false) { |
7752
|
|
|
return $returnTmp; |
7753
|
|
|
} |
7754
|
|
|
} |
7755
|
|
|
|
7756
|
|
|
// |
7757
|
|
|
// fallback for ascii only |
7758
|
|
|
// |
7759
|
|
|
|
7760
|
31 |
|
if (self::is_ascii($haystack) && self::is_ascii($needle)) { |
7761
|
15 |
|
return \stripos($haystack, $needle, $offset); |
7762
|
|
|
} |
7763
|
|
|
|
7764
|
|
|
// |
7765
|
|
|
// fallback via vanilla php |
7766
|
|
|
// |
7767
|
|
|
|
7768
|
20 |
|
$haystack = self::strtocasefold($haystack, true, false, $encoding, null, false); |
7769
|
20 |
|
$needle = self::strtocasefold($needle, true, false, $encoding, null, false); |
7770
|
|
|
|
7771
|
20 |
|
return self::strpos($haystack, $needle, $offset, $encoding); |
7772
|
|
|
} |
7773
|
|
|
|
7774
|
|
|
/** |
7775
|
|
|
* Returns all of haystack starting from and including the first occurrence of needle to the end. |
7776
|
|
|
* |
7777
|
|
|
* @param string $haystack <p>The input string. Must be valid UTF-8.</p> |
7778
|
|
|
* @param string $needle <p>The string to look for. Must be valid UTF-8.</p> |
7779
|
|
|
* @param bool $before_needle [optional] <p> |
7780
|
|
|
* If <b>TRUE</b>, it returns the part of the |
7781
|
|
|
* haystack before the first occurrence of the needle (excluding the needle). |
7782
|
|
|
* </p> |
7783
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
7784
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
7785
|
|
|
* |
7786
|
|
|
* @return false|string a sub-string,<br>or <strong>false</strong> if needle is not found |
7787
|
|
|
*/ |
7788
|
12 |
|
public static function stristr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false) |
7789
|
|
|
{ |
7790
|
12 |
|
if ($haystack === '' || $needle === '') { |
7791
|
3 |
|
return false; |
7792
|
|
|
} |
7793
|
|
|
|
7794
|
9 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
7795
|
1 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
7796
|
|
|
} |
7797
|
|
|
|
7798
|
9 |
|
if ($cleanUtf8 === true) { |
7799
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
7800
|
|
|
// if invalid characters are found in $haystack before $needle |
7801
|
1 |
|
$needle = self::clean($needle); |
7802
|
1 |
|
$haystack = self::clean($haystack); |
7803
|
|
|
} |
7804
|
|
|
|
7805
|
9 |
|
if (!$needle) { |
7806
|
|
|
return $haystack; |
7807
|
|
|
} |
7808
|
|
|
|
7809
|
9 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
7810
|
|
|
self::checkForSupport(); |
7811
|
|
|
} |
7812
|
|
|
|
7813
|
|
|
if ( |
7814
|
9 |
|
$encoding !== 'UTF-8' |
7815
|
|
|
&& |
7816
|
9 |
|
self::$SUPPORT['mbstring'] === false |
7817
|
|
|
) { |
7818
|
|
|
\trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
7819
|
|
|
} |
7820
|
|
|
|
7821
|
9 |
|
if (self::$SUPPORT['mbstring'] === true) { |
7822
|
9 |
|
return \mb_stristr($haystack, $needle, $before_needle, $encoding); |
7823
|
|
|
} |
7824
|
|
|
|
7825
|
|
|
if ( |
7826
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings |
7827
|
|
|
&& |
7828
|
|
|
self::$SUPPORT['intl'] === true |
7829
|
|
|
) { |
7830
|
|
|
$returnTmp = \grapheme_stristr($haystack, $needle, $before_needle); |
7831
|
|
|
if ($returnTmp !== false) { |
7832
|
|
|
return $returnTmp; |
7833
|
|
|
} |
7834
|
|
|
} |
7835
|
|
|
|
7836
|
|
|
if (self::is_ascii($needle) && self::is_ascii($haystack)) { |
7837
|
|
|
return \stristr($haystack, $needle, $before_needle); |
7838
|
|
|
} |
7839
|
|
|
|
7840
|
|
|
\preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match); |
7841
|
|
|
|
7842
|
|
|
if (!isset($match[1])) { |
7843
|
|
|
return false; |
7844
|
|
|
} |
7845
|
|
|
|
7846
|
|
|
if ($before_needle) { |
7847
|
|
|
return $match[1]; |
7848
|
|
|
} |
7849
|
|
|
|
7850
|
|
|
return self::substr($haystack, self::strlen($match[1])); |
|
|
|
|
7851
|
|
|
} |
7852
|
|
|
|
7853
|
|
|
/** |
7854
|
|
|
* Get the string length, not the byte-length! |
7855
|
|
|
* |
7856
|
|
|
* @see http://php.net/manual/en/function.mb-strlen.php |
7857
|
|
|
* |
7858
|
|
|
* @param string $str <p>The string being checked for length.</p> |
7859
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
7860
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
7861
|
|
|
* |
7862
|
|
|
* @return false|int |
7863
|
|
|
* The number <strong>(int)</strong> of characters in the string $str having character encoding |
7864
|
|
|
* $encoding. |
7865
|
|
|
* (One multi-byte character counted as +1). |
7866
|
|
|
* <br> |
7867
|
|
|
* Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid chars. |
7868
|
|
|
*/ |
7869
|
259 |
|
public static function strlen(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false) |
7870
|
|
|
{ |
7871
|
259 |
|
if ($str === '') { |
7872
|
37 |
|
return 0; |
7873
|
|
|
} |
7874
|
|
|
|
7875
|
257 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
7876
|
83 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
7877
|
|
|
} |
7878
|
|
|
|
7879
|
|
|
// |
7880
|
|
|
// fallback for binary || ascii only |
7881
|
|
|
// |
7882
|
|
|
|
7883
|
|
|
if ( |
7884
|
257 |
|
$encoding === 'CP850' |
7885
|
|
|
|| |
7886
|
257 |
|
$encoding === 'ASCII' |
7887
|
|
|
) { |
7888
|
2 |
|
return self::strlen_in_byte($str); |
7889
|
|
|
} |
7890
|
|
|
|
7891
|
257 |
|
if ($cleanUtf8 === true) { |
7892
|
|
|
// "mb_strlen" and "\iconv_strlen" returns wrong length, |
7893
|
|
|
// if invalid characters are found in $str |
7894
|
4 |
|
$str = self::clean($str); |
7895
|
|
|
} |
7896
|
|
|
|
7897
|
257 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
7898
|
|
|
self::checkForSupport(); |
7899
|
|
|
} |
7900
|
|
|
|
7901
|
|
|
if ( |
7902
|
257 |
|
$encoding !== 'UTF-8' |
7903
|
|
|
&& |
7904
|
257 |
|
self::$SUPPORT['mbstring'] === false |
7905
|
|
|
&& |
7906
|
257 |
|
self::$SUPPORT['iconv'] === false |
7907
|
|
|
) { |
7908
|
2 |
|
\trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
7909
|
|
|
} |
7910
|
|
|
|
7911
|
|
|
// |
7912
|
|
|
// fallback via mbstring |
7913
|
|
|
// |
7914
|
|
|
|
7915
|
257 |
|
if (self::$SUPPORT['mbstring'] === true) { |
7916
|
251 |
|
$returnTmp = \mb_strlen($str, $encoding); |
7917
|
251 |
|
if ($returnTmp !== false) { |
7918
|
251 |
|
return $returnTmp; |
7919
|
|
|
} |
7920
|
|
|
} |
7921
|
|
|
|
7922
|
|
|
// |
7923
|
|
|
// fallback via iconv |
7924
|
|
|
// |
7925
|
|
|
|
7926
|
8 |
|
if (self::$SUPPORT['iconv'] === true) { |
7927
|
|
|
$returnTmp = \iconv_strlen($str, $encoding); |
7928
|
|
|
if ($returnTmp !== false) { |
7929
|
|
|
return $returnTmp; |
7930
|
|
|
} |
7931
|
|
|
} |
7932
|
|
|
|
7933
|
|
|
// |
7934
|
|
|
// fallback via intl |
7935
|
|
|
// |
7936
|
|
|
|
7937
|
|
|
if ( |
7938
|
8 |
|
$encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings |
7939
|
|
|
&& |
7940
|
8 |
|
self::$SUPPORT['intl'] === true |
7941
|
|
|
) { |
7942
|
|
|
$returnTmp = \grapheme_strlen($str); |
7943
|
|
|
if ($returnTmp !== null) { |
7944
|
|
|
return $returnTmp; |
7945
|
|
|
} |
7946
|
|
|
} |
7947
|
|
|
|
7948
|
|
|
// |
7949
|
|
|
// fallback for ascii only |
7950
|
|
|
// |
7951
|
|
|
|
7952
|
8 |
|
if (self::is_ascii($str)) { |
7953
|
4 |
|
return \strlen($str); |
7954
|
|
|
} |
7955
|
|
|
|
7956
|
|
|
// |
7957
|
|
|
// fallback via vanilla php |
7958
|
|
|
// |
7959
|
|
|
|
7960
|
8 |
|
\preg_match_all('/./us', $str, $parts); |
7961
|
|
|
|
7962
|
8 |
|
$returnTmp = \count($parts[0]); |
7963
|
8 |
|
if ($returnTmp === 0 && isset($str[0])) { |
7964
|
|
|
return false; |
7965
|
|
|
} |
7966
|
|
|
|
7967
|
8 |
|
return $returnTmp; |
7968
|
|
|
} |
7969
|
|
|
|
7970
|
|
|
/** |
7971
|
|
|
* Get string length in byte. |
7972
|
|
|
* |
7973
|
|
|
* @param string $str |
7974
|
|
|
* |
7975
|
|
|
* @return int |
7976
|
|
|
*/ |
7977
|
192 |
|
public static function strlen_in_byte(string $str): int |
7978
|
|
|
{ |
7979
|
192 |
|
if ($str === '') { |
7980
|
|
|
return 0; |
7981
|
|
|
} |
7982
|
|
|
|
7983
|
192 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
7984
|
|
|
self::checkForSupport(); |
7985
|
|
|
} |
7986
|
|
|
|
7987
|
192 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
7988
|
|
|
// "mb_" is available if overload is used, so use it ... |
7989
|
|
|
return \mb_strlen($str, 'CP850'); // 8-BIT |
7990
|
|
|
} |
7991
|
|
|
|
7992
|
192 |
|
return \strlen($str); |
7993
|
|
|
} |
7994
|
|
|
|
7995
|
|
|
/** |
7996
|
|
|
* Case insensitive string comparisons using a "natural order" algorithm. |
7997
|
|
|
* |
7998
|
|
|
* INFO: natural order version of UTF8::strcasecmp() |
7999
|
|
|
* |
8000
|
|
|
* @param string $str1 <p>The first string.</p> |
8001
|
|
|
* @param string $str2 <p>The second string.</p> |
8002
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8003
|
|
|
* |
8004
|
|
|
* @return int |
8005
|
|
|
* <strong>< 0</strong> if str1 is less than str2<br> |
8006
|
|
|
* <strong>> 0</strong> if str1 is greater than str2<br> |
8007
|
|
|
* <strong>0</strong> if they are equal |
8008
|
|
|
*/ |
8009
|
2 |
|
public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int |
8010
|
|
|
{ |
8011
|
2 |
|
return self::strnatcmp( |
8012
|
2 |
|
self::strtocasefold($str1, true, false, $encoding, null, false), |
8013
|
2 |
|
self::strtocasefold($str2, true, false, $encoding, null, false) |
8014
|
|
|
); |
8015
|
|
|
} |
8016
|
|
|
|
8017
|
|
|
/** |
8018
|
|
|
* String comparisons using a "natural order" algorithm |
8019
|
|
|
* |
8020
|
|
|
* INFO: natural order version of UTF8::strcmp() |
8021
|
|
|
* |
8022
|
|
|
* @see http://php.net/manual/en/function.strnatcmp.php |
8023
|
|
|
* |
8024
|
|
|
* @param string $str1 <p>The first string.</p> |
8025
|
|
|
* @param string $str2 <p>The second string.</p> |
8026
|
|
|
* |
8027
|
|
|
* @return int |
8028
|
|
|
* <strong>< 0</strong> if str1 is less than str2;<br> |
8029
|
|
|
* <strong>> 0</strong> if str1 is greater than str2;<br> |
8030
|
|
|
* <strong>0</strong> if they are equal |
8031
|
|
|
*/ |
8032
|
4 |
|
public static function strnatcmp(string $str1, string $str2): int |
8033
|
|
|
{ |
8034
|
4 |
|
return $str1 . '' === $str2 . '' ? 0 : \strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2)); |
8035
|
|
|
} |
8036
|
|
|
|
8037
|
|
|
/** |
8038
|
|
|
* Case-insensitive string comparison of the first n characters. |
8039
|
|
|
* |
8040
|
|
|
* @see http://php.net/manual/en/function.strncasecmp.php |
8041
|
|
|
* |
8042
|
|
|
* @param string $str1 <p>The first string.</p> |
8043
|
|
|
* @param string $str2 <p>The second string.</p> |
8044
|
|
|
* @param int $len <p>The length of strings to be used in the comparison.</p> |
8045
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8046
|
|
|
* |
8047
|
|
|
* @return int |
8048
|
|
|
* <strong>< 0</strong> if <i>str1</i> is less than <i>str2</i>;<br> |
8049
|
|
|
* <strong>> 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br> |
8050
|
|
|
* <strong>0</strong> if they are equal |
8051
|
|
|
*/ |
8052
|
2 |
|
public static function strncasecmp(string $str1, string $str2, int $len, string $encoding = 'UTF-8'): int |
8053
|
|
|
{ |
8054
|
2 |
|
return self::strncmp( |
8055
|
2 |
|
self::strtocasefold($str1, true, false, $encoding, null, false), |
8056
|
2 |
|
self::strtocasefold($str2, true, false, $encoding, null, false), |
8057
|
2 |
|
$len |
8058
|
|
|
); |
8059
|
|
|
} |
8060
|
|
|
|
8061
|
|
|
/** |
8062
|
|
|
* String comparison of the first n characters. |
8063
|
|
|
* |
8064
|
|
|
* @see http://php.net/manual/en/function.strncmp.php |
8065
|
|
|
* |
8066
|
|
|
* @param string $str1 <p>The first string.</p> |
8067
|
|
|
* @param string $str2 <p>The second string.</p> |
8068
|
|
|
* @param int $len <p>Number of characters to use in the comparison.</p> |
8069
|
|
|
* |
8070
|
|
|
* @return int |
8071
|
|
|
* <strong>< 0</strong> if <i>str1</i> is less than <i>str2</i>;<br> |
8072
|
|
|
* <strong>> 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br> |
8073
|
|
|
* <strong>0</strong> if they are equal |
8074
|
|
|
*/ |
8075
|
4 |
|
public static function strncmp(string $str1, string $str2, int $len): int |
8076
|
|
|
{ |
8077
|
4 |
|
$str1 = (string) self::substr($str1, 0, $len); |
8078
|
4 |
|
$str2 = (string) self::substr($str2, 0, $len); |
8079
|
|
|
|
8080
|
4 |
|
return self::strcmp($str1, $str2); |
8081
|
|
|
} |
8082
|
|
|
|
8083
|
|
|
/** |
8084
|
|
|
* Search a string for any of a set of characters. |
8085
|
|
|
* |
8086
|
|
|
* @see http://php.net/manual/en/function.strpbrk.php |
8087
|
|
|
* |
8088
|
|
|
* @param string $haystack <p>The string where char_list is looked for.</p> |
8089
|
|
|
* @param string $char_list <p>This parameter is case sensitive.</p> |
8090
|
|
|
* |
8091
|
|
|
* @return false|string string starting from the character found, or false if it is not found |
8092
|
|
|
*/ |
8093
|
2 |
|
public static function strpbrk(string $haystack, string $char_list) |
8094
|
|
|
{ |
8095
|
2 |
|
if ($haystack === '' || $char_list === '') { |
8096
|
2 |
|
return false; |
8097
|
|
|
} |
8098
|
|
|
|
8099
|
2 |
|
if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) { |
8100
|
2 |
|
return \substr($haystack, (int) \strpos($haystack, $m[0])); |
8101
|
|
|
} |
8102
|
|
|
|
8103
|
2 |
|
return false; |
8104
|
|
|
} |
8105
|
|
|
|
8106
|
|
|
/** |
8107
|
|
|
* Find position of first occurrence of string in a string. |
8108
|
|
|
* |
8109
|
|
|
* @see http://php.net/manual/en/function.mb-strpos.php |
8110
|
|
|
* |
8111
|
|
|
* @param string $haystack <p>The string from which to get the position of the first occurrence of needle.</p> |
8112
|
|
|
* @param int|string $needle <p>The string to find in haystack.<br>Or a code point as int.</p> |
8113
|
|
|
* @param int $offset [optional] <p>The search offset. If it is not specified, 0 is used.</p> |
8114
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8115
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
8116
|
|
|
* |
8117
|
|
|
* @return false|int |
8118
|
|
|
* The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack |
8119
|
|
|
* string.<br> If needle is not found it returns false. |
8120
|
|
|
*/ |
8121
|
142 |
|
public static function strpos(string $haystack, $needle, int $offset = 0, $encoding = 'UTF-8', bool $cleanUtf8 = false) |
8122
|
|
|
{ |
8123
|
142 |
|
if ($haystack === '') { |
8124
|
4 |
|
return false; |
8125
|
|
|
} |
8126
|
|
|
|
8127
|
|
|
// iconv and mbstring do not support integer $needle |
8128
|
141 |
|
if ((int) $needle === $needle && $needle >= 0) { |
8129
|
|
|
$needle = (string) self::chr($needle); |
8130
|
|
|
} |
8131
|
141 |
|
$needle = (string) $needle; |
8132
|
|
|
|
8133
|
141 |
|
if ($needle === '') { |
8134
|
2 |
|
return false; |
8135
|
|
|
} |
8136
|
|
|
|
8137
|
141 |
|
if ($cleanUtf8 === true) { |
8138
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
8139
|
|
|
// if invalid characters are found in $haystack before $needle |
8140
|
3 |
|
$needle = self::clean($needle); |
8141
|
3 |
|
$haystack = self::clean($haystack); |
8142
|
|
|
} |
8143
|
|
|
|
8144
|
141 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
8145
|
55 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8146
|
|
|
} |
8147
|
|
|
|
8148
|
141 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
8149
|
|
|
self::checkForSupport(); |
8150
|
|
|
} |
8151
|
|
|
|
8152
|
|
|
// |
8153
|
|
|
// fallback for binary || ascii only |
8154
|
|
|
// |
8155
|
|
|
|
8156
|
|
|
if ( |
8157
|
141 |
|
$encoding === 'CP850' |
8158
|
|
|
|| |
8159
|
141 |
|
$encoding === 'ASCII' |
8160
|
|
|
) { |
8161
|
2 |
|
return self::strpos_in_byte($haystack, $needle, $offset); |
8162
|
|
|
} |
8163
|
|
|
|
8164
|
|
|
if ( |
8165
|
141 |
|
$encoding !== 'UTF-8' |
8166
|
|
|
&& |
8167
|
141 |
|
self::$SUPPORT['iconv'] === false |
8168
|
|
|
&& |
8169
|
141 |
|
self::$SUPPORT['mbstring'] === false |
8170
|
|
|
) { |
8171
|
2 |
|
\trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
8172
|
|
|
} |
8173
|
|
|
|
8174
|
|
|
// |
8175
|
|
|
// fallback via mbstring |
8176
|
|
|
// |
8177
|
|
|
|
8178
|
141 |
|
if (self::$SUPPORT['mbstring'] === true) { |
8179
|
141 |
|
$returnTmp = \mb_strpos($haystack, $needle, $offset, $encoding); |
8180
|
141 |
|
if ($returnTmp !== false) { |
8181
|
86 |
|
return $returnTmp; |
8182
|
|
|
} |
8183
|
|
|
} |
8184
|
|
|
|
8185
|
|
|
// |
8186
|
|
|
// fallback via intl |
8187
|
|
|
// |
8188
|
|
|
|
8189
|
|
|
if ( |
8190
|
69 |
|
$encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings |
8191
|
|
|
&& |
8192
|
69 |
|
$offset >= 0 // grapheme_strpos() can't handle negative offset |
8193
|
|
|
&& |
8194
|
69 |
|
self::$SUPPORT['intl'] === true |
8195
|
|
|
) { |
8196
|
69 |
|
$returnTmp = \grapheme_strpos($haystack, $needle, $offset); |
8197
|
69 |
|
if ($returnTmp !== false) { |
8198
|
|
|
return $returnTmp; |
8199
|
|
|
} |
8200
|
|
|
} |
8201
|
|
|
|
8202
|
|
|
// |
8203
|
|
|
// fallback via iconv |
8204
|
|
|
// |
8205
|
|
|
|
8206
|
|
|
if ( |
8207
|
69 |
|
$offset >= 0 // iconv_strpos() can't handle negative offset |
8208
|
|
|
&& |
8209
|
69 |
|
self::$SUPPORT['iconv'] === true |
8210
|
|
|
) { |
8211
|
|
|
// ignore invalid negative offset to keep compatibility |
8212
|
|
|
// with php < 5.5.35, < 5.6.21, < 7.0.6 |
8213
|
69 |
|
$returnTmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding); |
8214
|
69 |
|
if ($returnTmp !== false) { |
8215
|
|
|
return $returnTmp; |
8216
|
|
|
} |
8217
|
|
|
} |
8218
|
|
|
|
8219
|
|
|
// |
8220
|
|
|
// fallback for ascii only |
8221
|
|
|
// |
8222
|
|
|
|
8223
|
69 |
|
if (($haystackIsAscii = self::is_ascii($haystack)) && self::is_ascii($needle)) { |
8224
|
35 |
|
return \strpos($haystack, $needle, $offset); |
8225
|
|
|
} |
8226
|
|
|
|
8227
|
|
|
// |
8228
|
|
|
// fallback via vanilla php |
8229
|
|
|
// |
8230
|
|
|
|
8231
|
39 |
|
if ($haystackIsAscii) { |
8232
|
|
|
$haystackTmp = \substr($haystack, $offset); |
8233
|
|
|
} else { |
8234
|
39 |
|
$haystackTmp = self::substr($haystack, $offset, null, $encoding); |
8235
|
|
|
} |
8236
|
39 |
|
if ($haystackTmp === false) { |
8237
|
|
|
$haystackTmp = ''; |
8238
|
|
|
} |
8239
|
39 |
|
$haystack = (string) $haystackTmp; |
8240
|
|
|
|
8241
|
39 |
|
if ($offset < 0) { |
8242
|
2 |
|
$offset = 0; |
8243
|
|
|
} |
8244
|
|
|
|
8245
|
39 |
|
$pos = \strpos($haystack, $needle); |
8246
|
39 |
|
if ($pos === false) { |
8247
|
39 |
|
return false; |
8248
|
|
|
} |
8249
|
|
|
|
8250
|
4 |
|
if ($pos) { |
8251
|
4 |
|
return $offset + (self::strlen(\substr($haystack, 0, $pos), $encoding)); |
8252
|
|
|
} |
8253
|
|
|
|
8254
|
2 |
|
return $offset + 0; |
8255
|
|
|
} |
8256
|
|
|
|
8257
|
|
|
/** |
8258
|
|
|
* Find position of first occurrence of string in a string. |
8259
|
|
|
* |
8260
|
|
|
* @param string $haystack <p> |
8261
|
|
|
* The string being checked. |
8262
|
|
|
* </p> |
8263
|
|
|
* @param string $needle <p> |
8264
|
|
|
* The position counted from the beginning of haystack. |
8265
|
|
|
* </p> |
8266
|
|
|
* @param int $offset [optional] <p> |
8267
|
|
|
* The search offset. If it is not specified, 0 is used. |
8268
|
|
|
* </p> |
8269
|
|
|
* |
8270
|
|
|
* @return false|int The numeric position of the first occurrence of needle in the |
8271
|
|
|
* haystack string. If needle is not found, it returns false. |
8272
|
|
|
*/ |
8273
|
81 |
|
public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0) |
8274
|
|
|
{ |
8275
|
81 |
|
if ($haystack === '' || $needle === '') { |
8276
|
|
|
return false; |
8277
|
|
|
} |
8278
|
|
|
|
8279
|
81 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
8280
|
|
|
self::checkForSupport(); |
8281
|
|
|
} |
8282
|
|
|
|
8283
|
81 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
8284
|
|
|
// "mb_" is available if overload is used, so use it ... |
8285
|
|
|
return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT |
8286
|
|
|
} |
8287
|
|
|
|
8288
|
81 |
|
return \strpos($haystack, $needle, $offset); |
8289
|
|
|
} |
8290
|
|
|
|
8291
|
|
|
/** |
8292
|
|
|
* Finds the last occurrence of a character in a string within another. |
8293
|
|
|
* |
8294
|
|
|
* @see http://php.net/manual/en/function.mb-strrchr.php |
8295
|
|
|
* |
8296
|
|
|
* @param string $haystack <p>The string from which to get the last occurrence of needle.</p> |
8297
|
|
|
* @param string $needle <p>The string to find in haystack</p> |
8298
|
|
|
* @param bool $before_needle [optional] <p> |
8299
|
|
|
* Determines which portion of haystack |
8300
|
|
|
* this function returns. |
8301
|
|
|
* If set to true, it returns all of haystack |
8302
|
|
|
* from the beginning to the last occurrence of needle. |
8303
|
|
|
* If set to false, it returns all of haystack |
8304
|
|
|
* from the last occurrence of needle to the end, |
8305
|
|
|
* </p> |
8306
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8307
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
8308
|
|
|
* |
8309
|
|
|
* @return false|string the portion of haystack or false if needle is not found |
8310
|
|
|
*/ |
8311
|
4 |
|
public static function strrchr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false) |
8312
|
|
|
{ |
8313
|
4 |
|
if ($haystack === '' || $needle === '') { |
8314
|
2 |
|
return false; |
8315
|
|
|
} |
8316
|
|
|
|
8317
|
4 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
8318
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8319
|
|
|
} |
8320
|
|
|
|
8321
|
4 |
|
if ($cleanUtf8 === true) { |
8322
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
8323
|
|
|
// if invalid characters are found in $haystack before $needle |
8324
|
2 |
|
$needle = self::clean($needle); |
8325
|
2 |
|
$haystack = self::clean($haystack); |
8326
|
|
|
} |
8327
|
|
|
|
8328
|
4 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
8329
|
|
|
self::checkForSupport(); |
8330
|
|
|
} |
8331
|
|
|
|
8332
|
|
|
if ( |
8333
|
4 |
|
$encoding !== 'UTF-8' |
8334
|
|
|
&& |
8335
|
4 |
|
self::$SUPPORT['mbstring'] === false |
8336
|
|
|
) { |
8337
|
|
|
\trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
8338
|
|
|
} |
8339
|
|
|
|
8340
|
4 |
|
if (self::$SUPPORT['mbstring'] === true) { |
8341
|
4 |
|
return \mb_strrchr($haystack, $needle, $before_needle, $encoding); |
8342
|
|
|
} |
8343
|
|
|
|
8344
|
|
|
// |
8345
|
|
|
// fallback for binary || ascii only |
8346
|
|
|
// |
8347
|
|
|
|
8348
|
|
|
if ( |
8349
|
|
|
$before_needle === false |
8350
|
|
|
&& |
8351
|
|
|
( |
8352
|
|
|
$encoding === 'CP850' |
8353
|
|
|
|| |
8354
|
|
|
$encoding === 'ASCII' |
8355
|
|
|
) |
8356
|
|
|
) { |
8357
|
|
|
return \strrchr($haystack, $needle); |
8358
|
|
|
} |
8359
|
|
|
|
8360
|
|
|
// |
8361
|
|
|
// fallback via iconv |
8362
|
|
|
// |
8363
|
|
|
|
8364
|
|
|
if (self::$SUPPORT['iconv'] === true) { |
8365
|
|
|
$needleTmp = self::substr($needle, 0, 1, $encoding); |
8366
|
|
|
if ($needleTmp === false) { |
8367
|
|
|
return false; |
8368
|
|
|
} |
8369
|
|
|
$needle = (string) $needleTmp; |
8370
|
|
|
|
8371
|
|
|
$pos = \iconv_strrpos($haystack, $needle, $encoding); |
8372
|
|
|
if ($pos === false) { |
8373
|
|
|
return false; |
8374
|
|
|
} |
8375
|
|
|
|
8376
|
|
|
if ($before_needle) { |
8377
|
|
|
return self::substr($haystack, 0, $pos, $encoding); |
8378
|
|
|
} |
8379
|
|
|
|
8380
|
|
|
return self::substr($haystack, $pos, null, $encoding); |
8381
|
|
|
} |
8382
|
|
|
|
8383
|
|
|
// |
8384
|
|
|
// fallback via vanilla php |
8385
|
|
|
// |
8386
|
|
|
|
8387
|
|
|
$needleTmp = self::substr($needle, 0, 1, $encoding); |
8388
|
|
|
if ($needleTmp === false) { |
8389
|
|
|
return false; |
8390
|
|
|
} |
8391
|
|
|
$needle = (string) $needleTmp; |
8392
|
|
|
|
8393
|
|
|
$pos = self::strrpos($haystack, $needle, null, $encoding); |
8394
|
|
|
if ($pos === false) { |
8395
|
|
|
return false; |
8396
|
|
|
} |
8397
|
|
|
|
8398
|
|
|
if ($before_needle) { |
8399
|
|
|
return self::substr($haystack, 0, $pos, $encoding); |
8400
|
|
|
} |
8401
|
|
|
|
8402
|
|
|
return self::substr($haystack, $pos, null, $encoding); |
8403
|
|
|
} |
8404
|
|
|
|
8405
|
|
|
/** |
8406
|
|
|
* Reverses characters order in the string. |
8407
|
|
|
* |
8408
|
|
|
* @param string $str <p>The input string.</p> |
8409
|
|
|
* |
8410
|
|
|
* @return string the string with characters in the reverse sequence |
8411
|
|
|
*/ |
8412
|
10 |
|
public static function strrev(string $str): string |
8413
|
|
|
{ |
8414
|
10 |
|
if ($str === '') { |
8415
|
4 |
|
return ''; |
8416
|
|
|
} |
8417
|
|
|
|
8418
|
8 |
|
$reversed = ''; |
8419
|
8 |
|
$i = self::strlen($str); |
8420
|
8 |
|
while ($i--) { |
8421
|
8 |
|
$reversed .= self::substr($str, $i, 1); |
|
|
|
|
8422
|
|
|
} |
8423
|
|
|
|
8424
|
8 |
|
return $reversed; |
8425
|
|
|
} |
8426
|
|
|
|
8427
|
|
|
/** |
8428
|
|
|
* Finds the last occurrence of a character in a string within another, case insensitive. |
8429
|
|
|
* |
8430
|
|
|
* @see http://php.net/manual/en/function.mb-strrichr.php |
8431
|
|
|
* |
8432
|
|
|
* @param string $haystack <p>The string from which to get the last occurrence of needle.</p> |
8433
|
|
|
* @param string $needle <p>The string to find in haystack.</p> |
8434
|
|
|
* @param bool $before_needle [optional] <p> |
8435
|
|
|
* Determines which portion of haystack |
8436
|
|
|
* this function returns. |
8437
|
|
|
* If set to true, it returns all of haystack |
8438
|
|
|
* from the beginning to the last occurrence of needle. |
8439
|
|
|
* If set to false, it returns all of haystack |
8440
|
|
|
* from the last occurrence of needle to the end, |
8441
|
|
|
* </p> |
8442
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8443
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
8444
|
|
|
* |
8445
|
|
|
* @return false|string the portion of haystack or<br>false if needle is not found |
8446
|
|
|
*/ |
8447
|
3 |
|
public static function strrichr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $cleanUtf8 = false) |
8448
|
|
|
{ |
8449
|
3 |
|
if ($haystack === '' || $needle === '') { |
8450
|
2 |
|
return false; |
8451
|
|
|
} |
8452
|
|
|
|
8453
|
3 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
8454
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8455
|
|
|
} |
8456
|
|
|
|
8457
|
3 |
|
if ($cleanUtf8 === true) { |
8458
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
8459
|
|
|
// if invalid characters are found in $haystack before $needle |
8460
|
2 |
|
$needle = self::clean($needle); |
8461
|
2 |
|
$haystack = self::clean($haystack); |
8462
|
|
|
} |
8463
|
|
|
|
8464
|
3 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
8465
|
|
|
self::checkForSupport(); |
8466
|
|
|
} |
8467
|
|
|
|
8468
|
|
|
// |
8469
|
|
|
// fallback via mbstring |
8470
|
|
|
// |
8471
|
|
|
|
8472
|
3 |
|
if (self::$SUPPORT['mbstring'] === true) { |
8473
|
3 |
|
return \mb_strrichr($haystack, $needle, $before_needle, $encoding); |
8474
|
|
|
} |
8475
|
|
|
|
8476
|
|
|
// |
8477
|
|
|
// fallback via vanilla php |
8478
|
|
|
// |
8479
|
|
|
|
8480
|
|
|
$needleTmp = self::substr($needle, 0, 1, $encoding); |
8481
|
|
|
if ($needleTmp === false) { |
8482
|
|
|
return false; |
8483
|
|
|
} |
8484
|
|
|
$needle = (string) $needleTmp; |
8485
|
|
|
|
8486
|
|
|
$pos = self::strripos($haystack, $needle, 0, $encoding); |
8487
|
|
|
if ($pos === false) { |
8488
|
|
|
return false; |
8489
|
|
|
} |
8490
|
|
|
|
8491
|
|
|
if ($before_needle) { |
8492
|
|
|
return self::substr($haystack, 0, $pos, $encoding); |
8493
|
|
|
} |
8494
|
|
|
|
8495
|
|
|
return self::substr($haystack, $pos, null, $encoding); |
8496
|
|
|
} |
8497
|
|
|
|
8498
|
|
|
/** |
8499
|
|
|
* Find position of last occurrence of a case-insensitive string. |
8500
|
|
|
* |
8501
|
|
|
* @param string $haystack <p>The string to look in.</p> |
8502
|
|
|
* @param int|string $needle <p>The string to look for.</p> |
8503
|
|
|
* @param int $offset [optional] <p>Number of characters to ignore in the beginning or end.</p> |
8504
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8505
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
8506
|
|
|
* |
8507
|
|
|
* @return false|int |
8508
|
|
|
* The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack |
8509
|
|
|
* string.<br>If needle is not found, it returns false. |
8510
|
|
|
*/ |
8511
|
4 |
|
public static function strripos(string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $cleanUtf8 = false) |
8512
|
|
|
{ |
8513
|
4 |
|
if ($haystack === '') { |
8514
|
|
|
return false; |
8515
|
|
|
} |
8516
|
|
|
|
8517
|
|
|
// iconv and mbstring do not support integer $needle |
8518
|
4 |
|
if ((int) $needle === $needle && $needle >= 0) { |
8519
|
|
|
$needle = (string) self::chr($needle); |
8520
|
|
|
} |
8521
|
4 |
|
$needle = (string) $needle; |
8522
|
|
|
|
8523
|
4 |
|
if ($needle === '') { |
8524
|
|
|
return false; |
8525
|
|
|
} |
8526
|
|
|
|
8527
|
4 |
|
if ($cleanUtf8 === true) { |
8528
|
|
|
// mb_strripos() && iconv_strripos() is not tolerant to invalid characters |
8529
|
2 |
|
$needle = self::clean($needle); |
8530
|
2 |
|
$haystack = self::clean($haystack); |
8531
|
|
|
} |
8532
|
|
|
|
8533
|
4 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
8534
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8535
|
|
|
} |
8536
|
|
|
|
8537
|
|
|
// |
8538
|
|
|
// fallback for binary || ascii only |
8539
|
|
|
// |
8540
|
|
|
|
8541
|
|
|
if ( |
8542
|
4 |
|
$encoding === 'CP850' |
8543
|
|
|
|| |
8544
|
4 |
|
$encoding === 'ASCII' |
8545
|
|
|
) { |
8546
|
|
|
return self::strripos_in_byte($haystack, $needle, $offset); |
8547
|
|
|
} |
8548
|
|
|
|
8549
|
4 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
8550
|
|
|
self::checkForSupport(); |
8551
|
|
|
} |
8552
|
|
|
|
8553
|
|
|
if ( |
8554
|
4 |
|
$encoding !== 'UTF-8' |
8555
|
|
|
&& |
8556
|
4 |
|
self::$SUPPORT['mbstring'] === false |
8557
|
|
|
) { |
8558
|
|
|
\trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
8559
|
|
|
} |
8560
|
|
|
|
8561
|
|
|
// |
8562
|
|
|
// fallback via mbstrig |
8563
|
|
|
// |
8564
|
|
|
|
8565
|
4 |
|
if (self::$SUPPORT['mbstring'] === true) { |
8566
|
4 |
|
return \mb_strripos($haystack, $needle, $offset, $encoding); |
8567
|
|
|
} |
8568
|
|
|
|
8569
|
|
|
// |
8570
|
|
|
// fallback via intl |
8571
|
|
|
// |
8572
|
|
|
|
8573
|
|
|
if ( |
8574
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings |
8575
|
|
|
&& |
8576
|
|
|
$offset >= 0 // grapheme_strripos() can't handle negative offset |
8577
|
|
|
&& |
8578
|
|
|
self::$SUPPORT['intl'] === true |
8579
|
|
|
) { |
8580
|
|
|
$returnTmp = \grapheme_strripos($haystack, $needle, $offset); |
8581
|
|
|
if ($returnTmp !== false) { |
8582
|
|
|
return $returnTmp; |
8583
|
|
|
} |
8584
|
|
|
} |
8585
|
|
|
|
8586
|
|
|
// |
8587
|
|
|
// fallback for ascii only |
8588
|
|
|
// |
8589
|
|
|
|
8590
|
|
|
if (self::is_ascii($haystack) && self::is_ascii($needle)) { |
8591
|
|
|
return self::strripos_in_byte($haystack, $needle, $offset); |
8592
|
|
|
} |
8593
|
|
|
|
8594
|
|
|
// |
8595
|
|
|
// fallback via vanilla php |
8596
|
|
|
// |
8597
|
|
|
|
8598
|
|
|
$haystack = self::strtocasefold($haystack, true, false, $encoding); |
8599
|
|
|
$needle = self::strtocasefold($needle, true, false, $encoding); |
8600
|
|
|
|
8601
|
|
|
return self::strrpos($haystack, $needle, $offset, $encoding, $cleanUtf8); |
8602
|
|
|
} |
8603
|
|
|
|
8604
|
|
|
/** |
8605
|
|
|
* Finds position of last occurrence of a string within another, case insensitive. |
8606
|
|
|
* |
8607
|
|
|
* @param string $haystack <p> |
8608
|
|
|
* The string from which to get the position of the last occurrence |
8609
|
|
|
* of needle. |
8610
|
|
|
* </p> |
8611
|
|
|
* @param string $needle <p> |
8612
|
|
|
* The string to find in haystack. |
8613
|
|
|
* </p> |
8614
|
|
|
* @param int $offset [optional] <p> |
8615
|
|
|
* The position in haystack |
8616
|
|
|
* to start searching. |
8617
|
|
|
* </p> |
8618
|
|
|
* |
8619
|
|
|
* @return false|int return the numeric position of the last occurrence of needle in the |
8620
|
|
|
* haystack string, or false if needle is not found |
8621
|
|
|
*/ |
8622
|
|
|
public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0) |
8623
|
|
|
{ |
8624
|
|
|
if ($haystack === '' || $needle === '') { |
8625
|
|
|
return false; |
8626
|
|
|
} |
8627
|
|
|
|
8628
|
|
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
8629
|
|
|
self::checkForSupport(); |
8630
|
|
|
} |
8631
|
|
|
|
8632
|
|
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
8633
|
|
|
// "mb_" is available if overload is used, so use it ... |
8634
|
|
|
return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT |
8635
|
|
|
} |
8636
|
|
|
|
8637
|
|
|
return \strripos($haystack, $needle, $offset); |
8638
|
|
|
} |
8639
|
|
|
|
8640
|
|
|
/** |
8641
|
|
|
* Find position of last occurrence of a string in a string. |
8642
|
|
|
* |
8643
|
|
|
* @see http://php.net/manual/en/function.mb-strrpos.php |
8644
|
|
|
* |
8645
|
|
|
* @param string $haystack <p>The string being checked, for the last occurrence of needle</p> |
8646
|
|
|
* @param int|string $needle <p>The string to find in haystack.<br>Or a code point as int.</p> |
8647
|
|
|
* @param int $offset [optional] <p>May be specified to begin searching an arbitrary number of characters |
8648
|
|
|
* into the string. Negative values will stop searching at an arbitrary point prior to |
8649
|
|
|
* the end of the string. |
8650
|
|
|
* </p> |
8651
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
8652
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
8653
|
|
|
* |
8654
|
|
|
* @return false|int |
8655
|
|
|
* The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack |
8656
|
|
|
* string.<br>If needle is not found, it returns false. |
8657
|
|
|
*/ |
8658
|
38 |
|
public static function strrpos(string $haystack, $needle, int $offset = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false) |
8659
|
|
|
{ |
8660
|
38 |
|
if ($haystack === '') { |
8661
|
3 |
|
return false; |
8662
|
|
|
} |
8663
|
|
|
|
8664
|
|
|
// iconv and mbstring do not support integer $needle |
8665
|
37 |
|
if ((int) $needle === $needle && $needle >= 0) { |
8666
|
2 |
|
$needle = (string) self::chr($needle); |
8667
|
|
|
} |
8668
|
37 |
|
$needle = (string) $needle; |
8669
|
|
|
|
8670
|
37 |
|
if ($needle === '') { |
8671
|
2 |
|
return false; |
8672
|
|
|
} |
8673
|
|
|
|
8674
|
37 |
|
if ($cleanUtf8 === true) { |
8675
|
|
|
// \mb_strrpos && iconv_strrpos is not tolerant to invalid characters |
8676
|
4 |
|
$needle = self::clean($needle); |
8677
|
4 |
|
$haystack = self::clean($haystack); |
8678
|
|
|
} |
8679
|
|
|
|
8680
|
37 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
8681
|
14 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8682
|
|
|
} |
8683
|
|
|
|
8684
|
|
|
// |
8685
|
|
|
// fallback for binary || ascii only |
8686
|
|
|
// |
8687
|
|
|
|
8688
|
|
|
if ( |
8689
|
37 |
|
$encoding === 'CP850' |
8690
|
|
|
|| |
8691
|
37 |
|
$encoding === 'ASCII' |
8692
|
|
|
) { |
8693
|
2 |
|
return self::strrpos_in_byte($haystack, $needle, $offset); |
|
|
|
|
8694
|
|
|
} |
8695
|
|
|
|
8696
|
37 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
8697
|
|
|
self::checkForSupport(); |
8698
|
|
|
} |
8699
|
|
|
|
8700
|
|
|
if ( |
8701
|
37 |
|
$encoding !== 'UTF-8' |
8702
|
|
|
&& |
8703
|
37 |
|
self::$SUPPORT['mbstring'] === false |
8704
|
|
|
) { |
8705
|
|
|
\trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
8706
|
|
|
} |
8707
|
|
|
|
8708
|
|
|
// |
8709
|
|
|
// fallback via mbstring |
8710
|
|
|
// |
8711
|
|
|
|
8712
|
37 |
|
if (self::$SUPPORT['mbstring'] === true) { |
8713
|
37 |
|
return \mb_strrpos($haystack, $needle, $offset, $encoding); |
8714
|
|
|
} |
8715
|
|
|
|
8716
|
|
|
// |
8717
|
|
|
// fallback via intl |
8718
|
|
|
// |
8719
|
|
|
|
8720
|
|
|
if ( |
8721
|
|
|
$offset !== null |
8722
|
|
|
&& |
8723
|
|
|
$offset >= 0 // grapheme_strrpos() can't handle negative offset |
8724
|
|
|
&& |
8725
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings |
8726
|
|
|
&& |
8727
|
|
|
self::$SUPPORT['intl'] === true |
8728
|
|
|
) { |
8729
|
|
|
$returnTmp = \grapheme_strrpos($haystack, $needle, $offset); |
8730
|
|
|
if ($returnTmp !== false) { |
8731
|
|
|
return $returnTmp; |
8732
|
|
|
} |
8733
|
|
|
} |
8734
|
|
|
|
8735
|
|
|
// |
8736
|
|
|
// fallback for ascii only |
8737
|
|
|
// |
8738
|
|
|
|
8739
|
|
|
if ( |
8740
|
|
|
$offset !== null |
8741
|
|
|
&& |
8742
|
|
|
self::is_ascii($haystack) |
8743
|
|
|
&& |
8744
|
|
|
self::is_ascii($needle) |
8745
|
|
|
) { |
8746
|
|
|
return self::strrpos_in_byte($haystack, $needle, $offset); |
8747
|
|
|
} |
8748
|
|
|
|
8749
|
|
|
// |
8750
|
|
|
// fallback via vanilla php |
8751
|
|
|
// |
8752
|
|
|
|
8753
|
|
|
$haystackTmp = null; |
8754
|
|
|
if ($offset > 0) { |
8755
|
|
|
$haystackTmp = self::substr($haystack, $offset); |
|
|
|
|
8756
|
|
|
} elseif ($offset < 0) { |
8757
|
|
|
$haystackTmp = self::substr($haystack, 0, $offset); |
8758
|
|
|
$offset = 0; |
8759
|
|
|
} |
8760
|
|
|
|
8761
|
|
|
if ($haystackTmp !== null) { |
8762
|
|
|
if ($haystackTmp === false) { |
8763
|
|
|
$haystackTmp = ''; |
8764
|
|
|
} |
8765
|
|
|
$haystack = (string) $haystackTmp; |
8766
|
|
|
} |
8767
|
|
|
|
8768
|
|
|
$pos = self::strrpos_in_byte($haystack, $needle); |
8769
|
|
|
if ($pos === false) { |
8770
|
|
|
return false; |
8771
|
|
|
} |
8772
|
|
|
|
8773
|
|
|
return $offset + self::strlen(self::substr_in_byte($haystack, 0, $pos)); |
8774
|
|
|
} |
8775
|
|
|
|
8776
|
|
|
/** |
8777
|
|
|
* Find position of last occurrence of a string in a string. |
8778
|
|
|
* |
8779
|
|
|
* @param string $haystack <p> |
8780
|
|
|
* The string being checked, for the last occurrence |
8781
|
|
|
* of needle. |
8782
|
|
|
* </p> |
8783
|
|
|
* @param string $needle <p> |
8784
|
|
|
* The string to find in haystack. |
8785
|
|
|
* </p> |
8786
|
|
|
* @param int $offset [optional] May be specified to begin searching an arbitrary number of characters into |
8787
|
|
|
* the string. Negative values will stop searching at an arbitrary point |
8788
|
|
|
* prior to the end of the string. |
8789
|
|
|
* |
8790
|
|
|
* @return false|int The numeric position of the last occurrence of needle in the |
8791
|
|
|
* haystack string. If needle is not found, it returns false. |
8792
|
|
|
*/ |
8793
|
2 |
|
public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0) |
8794
|
|
|
{ |
8795
|
2 |
|
if ($haystack === '' || $needle === '') { |
8796
|
|
|
return false; |
8797
|
|
|
} |
8798
|
|
|
|
8799
|
2 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
8800
|
|
|
self::checkForSupport(); |
8801
|
|
|
} |
8802
|
|
|
|
8803
|
2 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
8804
|
|
|
// "mb_" is available if overload is used, so use it ... |
8805
|
|
|
return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT |
8806
|
|
|
} |
8807
|
|
|
|
8808
|
2 |
|
return \strrpos($haystack, $needle, $offset); |
8809
|
|
|
} |
8810
|
|
|
|
8811
|
|
|
/** |
8812
|
|
|
* Finds the length of the initial segment of a string consisting entirely of characters contained within a given |
8813
|
|
|
* mask. |
8814
|
|
|
* |
8815
|
|
|
* @param string $str <p>The input string.</p> |
8816
|
|
|
* @param string $mask <p>The mask of chars</p> |
8817
|
|
|
* @param int $offset [optional] |
8818
|
|
|
* @param int $length [optional] |
8819
|
|
|
* |
8820
|
|
|
* @return int |
8821
|
|
|
*/ |
8822
|
10 |
|
public static function strspn(string $str, string $mask, int $offset = 0, int $length = null): int |
8823
|
|
|
{ |
8824
|
10 |
|
if ($offset || $length !== null) { |
8825
|
2 |
|
$strTmp = self::substr($str, $offset, $length); |
8826
|
2 |
|
if ($strTmp === false) { |
8827
|
|
|
$strTmp = ''; |
8828
|
|
|
} |
8829
|
2 |
|
$str = (string) $strTmp; |
8830
|
|
|
} |
8831
|
|
|
|
8832
|
10 |
|
if ($str === '' || $mask === '') { |
8833
|
2 |
|
return 0; |
8834
|
|
|
} |
8835
|
|
|
|
8836
|
8 |
|
return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0; |
|
|
|
|
8837
|
|
|
} |
8838
|
|
|
|
8839
|
|
|
/** |
8840
|
|
|
* Returns part of haystack string from the first occurrence of needle to the end of haystack. |
8841
|
|
|
* |
8842
|
|
|
* @param string $haystack <p>The input string. Must be valid UTF-8.</p> |
8843
|
|
|
* @param string $needle <p>The string to look for. Must be valid UTF-8.</p> |
8844
|
|
|
* @param bool $before_needle [optional] <p> |
8845
|
|
|
* If <b>TRUE</b>, strstr() returns the part of the |
8846
|
|
|
* haystack before the first occurrence of the needle (excluding the needle). |
8847
|
|
|
* </p> |
8848
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
8849
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
8850
|
|
|
* |
8851
|
|
|
* @return false|string |
8852
|
|
|
* A sub-string,<br>or <strong>false</strong> if needle is not found |
8853
|
|
|
*/ |
8854
|
5 |
|
public static function strstr(string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', $cleanUtf8 = false) |
8855
|
|
|
{ |
8856
|
5 |
|
if ($haystack === '' || $needle === '') { |
8857
|
2 |
|
return false; |
8858
|
|
|
} |
8859
|
|
|
|
8860
|
5 |
|
if ($cleanUtf8 === true) { |
8861
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
8862
|
|
|
// if invalid characters are found in $haystack before $needle |
8863
|
|
|
$needle = self::clean($needle); |
8864
|
|
|
$haystack = self::clean($haystack); |
8865
|
|
|
} |
8866
|
|
|
|
8867
|
5 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
8868
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
8869
|
|
|
} |
8870
|
|
|
|
8871
|
|
|
// |
8872
|
|
|
// fallback for binary || ascii only |
8873
|
|
|
// |
8874
|
|
|
|
8875
|
|
|
if ( |
8876
|
5 |
|
$encoding === 'CP850' |
8877
|
|
|
|| |
8878
|
5 |
|
$encoding === 'ASCII' |
8879
|
|
|
) { |
8880
|
|
|
return self::strstr_in_byte($haystack, $needle, $before_needle); |
8881
|
|
|
} |
8882
|
|
|
|
8883
|
5 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
8884
|
|
|
self::checkForSupport(); |
8885
|
|
|
} |
8886
|
|
|
|
8887
|
|
|
if ( |
8888
|
5 |
|
$encoding !== 'UTF-8' |
8889
|
|
|
&& |
8890
|
5 |
|
self::$SUPPORT['mbstring'] === false |
8891
|
|
|
) { |
8892
|
|
|
\trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
8893
|
|
|
} |
8894
|
|
|
|
8895
|
|
|
// |
8896
|
|
|
// fallback via mbstring |
8897
|
|
|
// |
8898
|
|
|
|
8899
|
5 |
|
if (self::$SUPPORT['mbstring'] === true) { |
8900
|
5 |
|
return \mb_strstr($haystack, $needle, $before_needle, $encoding); |
8901
|
|
|
} |
8902
|
|
|
|
8903
|
|
|
// |
8904
|
|
|
// fallback via intl |
8905
|
|
|
// |
8906
|
|
|
|
8907
|
|
|
if ( |
8908
|
|
|
$encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings |
8909
|
|
|
&& |
8910
|
|
|
self::$SUPPORT['intl'] === true |
8911
|
|
|
) { |
8912
|
|
|
$returnTmp = \grapheme_strstr($haystack, $needle, $before_needle); |
8913
|
|
|
if ($returnTmp !== false) { |
8914
|
|
|
return $returnTmp; |
8915
|
|
|
} |
8916
|
|
|
} |
8917
|
|
|
|
8918
|
|
|
// |
8919
|
|
|
// fallback for ascii only |
8920
|
|
|
// |
8921
|
|
|
|
8922
|
|
|
if (self::is_ascii($haystack) && self::is_ascii($needle)) { |
8923
|
|
|
return self::strstr_in_byte($haystack, $needle, $before_needle); |
8924
|
|
|
} |
8925
|
|
|
|
8926
|
|
|
// |
8927
|
|
|
// fallback via vanilla php |
8928
|
|
|
// |
8929
|
|
|
|
8930
|
|
|
\preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match); |
8931
|
|
|
|
8932
|
|
|
if (!isset($match[1])) { |
8933
|
|
|
return false; |
8934
|
|
|
} |
8935
|
|
|
|
8936
|
|
|
if ($before_needle) { |
8937
|
|
|
return $match[1]; |
8938
|
|
|
} |
8939
|
|
|
|
8940
|
|
|
return self::substr($haystack, self::strlen($match[1])); |
|
|
|
|
8941
|
|
|
} |
8942
|
|
|
|
8943
|
|
|
/** |
8944
|
|
|
* * Finds first occurrence of a string within another. |
8945
|
|
|
* |
8946
|
|
|
* @param string $haystack <p> |
8947
|
|
|
* The string from which to get the first occurrence |
8948
|
|
|
* of needle. |
8949
|
|
|
* </p> |
8950
|
|
|
* @param string $needle <p> |
8951
|
|
|
* The string to find in haystack. |
8952
|
|
|
* </p> |
8953
|
|
|
* @param bool $before_needle [optional] <p> |
8954
|
|
|
* Determines which portion of haystack |
8955
|
|
|
* this function returns. |
8956
|
|
|
* If set to true, it returns all of haystack |
8957
|
|
|
* from the beginning to the first occurrence of needle. |
8958
|
|
|
* If set to false, it returns all of haystack |
8959
|
|
|
* from the first occurrence of needle to the end, |
8960
|
|
|
* </p> |
8961
|
|
|
* |
8962
|
|
|
* @return false|string the portion of haystack, |
8963
|
|
|
* or false if needle is not found |
8964
|
|
|
*/ |
8965
|
|
|
public static function strstr_in_byte(string $haystack, string $needle, bool $before_needle = false) |
8966
|
|
|
{ |
8967
|
|
|
if ($haystack === '' || $needle === '') { |
8968
|
|
|
return false; |
8969
|
|
|
} |
8970
|
|
|
|
8971
|
|
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
8972
|
|
|
self::checkForSupport(); |
8973
|
|
|
} |
8974
|
|
|
|
8975
|
|
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
8976
|
|
|
// "mb_" is available if overload is used, so use it ... |
8977
|
|
|
return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT |
8978
|
|
|
} |
8979
|
|
|
|
8980
|
|
|
return \strstr($haystack, $needle, $before_needle); |
8981
|
|
|
} |
8982
|
|
|
|
8983
|
|
|
/** |
8984
|
|
|
* Unicode transformation for case-less matching. |
8985
|
|
|
* |
8986
|
|
|
* @see http://unicode.org/reports/tr21/tr21-5.html |
8987
|
|
|
* |
8988
|
|
|
* @param string $str <p>The input string.</p> |
8989
|
|
|
* @param bool $full [optional] <p> |
8990
|
|
|
* <b>true</b>, replace full case folding chars (default)<br> |
8991
|
|
|
* <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD] |
8992
|
|
|
* </p> |
8993
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
8994
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
8995
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
8996
|
|
|
* @param bool $lower [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase |
8997
|
|
|
* is for some languages better ...</p> |
8998
|
|
|
* |
8999
|
|
|
* @return string |
9000
|
|
|
*/ |
9001
|
53 |
|
public static function strtocasefold( |
9002
|
|
|
string $str, |
9003
|
|
|
bool $full = true, |
9004
|
|
|
bool $cleanUtf8 = false, |
9005
|
|
|
string $encoding = 'UTF-8', |
9006
|
|
|
string $lang = null, |
9007
|
|
|
$lower = true |
9008
|
|
|
): string { |
9009
|
53 |
|
if ($str === '') { |
9010
|
5 |
|
return ''; |
9011
|
|
|
} |
9012
|
|
|
|
9013
|
52 |
|
$str = self::fixStrCaseHelper($str, $lower, $full); |
9014
|
|
|
|
9015
|
52 |
|
if ($lower === true) { |
9016
|
2 |
|
return self::strtolower($str, $encoding, $cleanUtf8, $lang); |
9017
|
|
|
} |
9018
|
|
|
|
9019
|
50 |
|
return self::strtoupper($str, $encoding, $cleanUtf8, $lang); |
9020
|
|
|
} |
9021
|
|
|
|
9022
|
|
|
/** |
9023
|
|
|
* Make a string lowercase. |
9024
|
|
|
* |
9025
|
|
|
* @see http://php.net/manual/en/function.mb-strtolower.php |
9026
|
|
|
* |
9027
|
|
|
* @param string $str <p>The string being lowercased.</p> |
9028
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9029
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9030
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
9031
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
9032
|
|
|
* |
9033
|
|
|
* @return string string with all alphabetic characters converted to lowercase |
9034
|
|
|
*/ |
9035
|
156 |
|
public static function strtolower($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string |
9036
|
|
|
{ |
9037
|
|
|
// init |
9038
|
156 |
|
$str = (string) $str; |
9039
|
|
|
|
9040
|
156 |
|
if ($str === '') { |
9041
|
12 |
|
return ''; |
9042
|
|
|
} |
9043
|
|
|
|
9044
|
154 |
|
if ($cleanUtf8 === true) { |
9045
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
9046
|
|
|
// if invalid characters are found in $haystack before $needle |
9047
|
4 |
|
$str = self::clean($str); |
9048
|
|
|
} |
9049
|
|
|
|
9050
|
154 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9051
|
94 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9052
|
|
|
} |
9053
|
|
|
|
9054
|
|
|
// hack for old php version or for the polyfill ... |
9055
|
154 |
|
if ($tryToKeepStringLength === true) { |
9056
|
|
|
$str = self::fixStrCaseHelper($str, true); |
9057
|
|
|
} |
9058
|
|
|
|
9059
|
154 |
|
if ($lang !== null) { |
9060
|
2 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
9061
|
|
|
self::checkForSupport(); |
9062
|
|
|
} |
9063
|
|
|
|
9064
|
2 |
|
if (self::$SUPPORT['intl'] === true) { |
9065
|
2 |
|
$langCode = $lang . '-Lower'; |
9066
|
2 |
|
if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) { |
9067
|
|
|
\trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang, \E_USER_WARNING); |
9068
|
|
|
|
9069
|
|
|
$langCode = 'Any-Lower'; |
9070
|
|
|
} |
9071
|
|
|
|
9072
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
9073
|
2 |
|
return \transliterator_transliterate($langCode, $str); |
9074
|
|
|
} |
9075
|
|
|
|
9076
|
|
|
\trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING); |
9077
|
|
|
} |
9078
|
|
|
|
9079
|
|
|
// always fallback via symfony polyfill |
9080
|
154 |
|
return \mb_strtolower($str, $encoding); |
9081
|
|
|
} |
9082
|
|
|
|
9083
|
|
|
/** |
9084
|
|
|
* Generic case sensitive transformation for collation matching. |
9085
|
|
|
* |
9086
|
|
|
* @param string $str <p>The input string</p> |
9087
|
|
|
* |
9088
|
|
|
* @return string |
9089
|
|
|
*/ |
9090
|
6 |
|
private static function strtonatfold(string $str): string |
9091
|
|
|
{ |
9092
|
|
|
/** @noinspection PhpUndefinedClassInspection */ |
9093
|
6 |
|
return \preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD)); |
9094
|
|
|
} |
9095
|
|
|
|
9096
|
|
|
/** |
9097
|
|
|
* Make a string uppercase. |
9098
|
|
|
* |
9099
|
|
|
* @see http://php.net/manual/en/function.mb-strtoupper.php |
9100
|
|
|
* |
9101
|
|
|
* @param string $str <p>The string being uppercased.</p> |
9102
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
9103
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9104
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
9105
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
9106
|
|
|
* |
9107
|
|
|
* @return string string with all alphabetic characters converted to uppercase |
9108
|
|
|
*/ |
9109
|
163 |
|
public static function strtoupper($str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string |
9110
|
|
|
{ |
9111
|
|
|
// init |
9112
|
163 |
|
$str = (string) $str; |
9113
|
|
|
|
9114
|
163 |
|
if ($str === '') { |
9115
|
12 |
|
return ''; |
9116
|
|
|
} |
9117
|
|
|
|
9118
|
161 |
|
if ($cleanUtf8 === true) { |
9119
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
9120
|
|
|
// if invalid characters are found in $haystack before $needle |
9121
|
3 |
|
$str = self::clean($str); |
9122
|
|
|
} |
9123
|
|
|
|
9124
|
161 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9125
|
76 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9126
|
|
|
} |
9127
|
|
|
|
9128
|
|
|
// hack for old php version or for the polyfill ... |
9129
|
161 |
|
if ($tryToKeepStringLength === true) { |
9130
|
2 |
|
$str = self::fixStrCaseHelper($str, false); |
9131
|
|
|
} |
9132
|
|
|
|
9133
|
161 |
|
if ($lang !== null) { |
9134
|
2 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
9135
|
|
|
self::checkForSupport(); |
9136
|
|
|
} |
9137
|
|
|
|
9138
|
2 |
|
if (self::$SUPPORT['intl'] === true) { |
9139
|
2 |
|
$langCode = $lang . '-Upper'; |
9140
|
2 |
|
if (!\in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) { |
9141
|
|
|
\trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING); |
9142
|
|
|
|
9143
|
|
|
$langCode = 'Any-Upper'; |
9144
|
|
|
} |
9145
|
|
|
|
9146
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
9147
|
2 |
|
return \transliterator_transliterate($langCode, $str); |
9148
|
|
|
} |
9149
|
|
|
|
9150
|
|
|
\trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING); |
9151
|
|
|
} |
9152
|
|
|
|
9153
|
|
|
// always fallback via symfony polyfill |
9154
|
161 |
|
return \mb_strtoupper($str, $encoding); |
9155
|
|
|
} |
9156
|
|
|
|
9157
|
|
|
/** |
9158
|
|
|
* Translate characters or replace sub-strings. |
9159
|
|
|
* |
9160
|
|
|
* @see http://php.net/manual/en/function.strtr.php |
9161
|
|
|
* |
9162
|
|
|
* @param string $str <p>The string being translated.</p> |
9163
|
|
|
* @param string|string[] $from <p>The string replacing from.</p> |
9164
|
|
|
* @param string|string[] $to <p>The string being translated to to.</p> |
9165
|
|
|
* |
9166
|
|
|
* @return string |
9167
|
|
|
* This function returns a copy of str, translating all occurrences of each character in from to the |
9168
|
|
|
* corresponding character in to |
9169
|
|
|
*/ |
9170
|
2 |
|
public static function strtr(string $str, $from, $to = \INF): string |
9171
|
|
|
{ |
9172
|
2 |
|
if ($str === '') { |
9173
|
|
|
return ''; |
9174
|
|
|
} |
9175
|
|
|
|
9176
|
2 |
|
if ($from === $to) { |
9177
|
|
|
return $str; |
9178
|
|
|
} |
9179
|
|
|
|
9180
|
2 |
|
if ($to !== \INF) { |
9181
|
2 |
|
$from = self::str_split($from); |
9182
|
2 |
|
$to = self::str_split($to); |
9183
|
2 |
|
$countFrom = \count($from); |
9184
|
2 |
|
$countTo = \count($to); |
9185
|
|
|
|
9186
|
2 |
|
if ($countFrom > $countTo) { |
9187
|
2 |
|
$from = \array_slice($from, 0, $countTo); |
9188
|
2 |
|
} elseif ($countFrom < $countTo) { |
9189
|
2 |
|
$to = \array_slice($to, 0, $countFrom); |
9190
|
|
|
} |
9191
|
|
|
|
9192
|
2 |
|
$from = \array_combine($from, $to); |
9193
|
|
|
} |
9194
|
|
|
|
9195
|
2 |
|
if (\is_string($from)) { |
9196
|
2 |
|
return \str_replace($from, '', $str); |
9197
|
|
|
} |
9198
|
|
|
|
9199
|
2 |
|
return \strtr($str, $from); |
9200
|
|
|
} |
9201
|
|
|
|
9202
|
|
|
/** |
9203
|
|
|
* Return the width of a string. |
9204
|
|
|
* |
9205
|
|
|
* @param string $str <p>The input string.</p> |
9206
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9207
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9208
|
|
|
* |
9209
|
|
|
* @return int |
9210
|
|
|
*/ |
9211
|
2 |
|
public static function strwidth(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): int |
9212
|
|
|
{ |
9213
|
2 |
|
if ($str === '') { |
9214
|
2 |
|
return 0; |
9215
|
|
|
} |
9216
|
|
|
|
9217
|
2 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9218
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9219
|
|
|
} |
9220
|
|
|
|
9221
|
2 |
|
if ($cleanUtf8 === true) { |
9222
|
|
|
// iconv and mbstring are not tolerant to invalid encoding |
9223
|
|
|
// further, their behaviour is inconsistent with that of PHP's substr |
9224
|
2 |
|
$str = self::clean($str); |
9225
|
|
|
} |
9226
|
|
|
|
9227
|
2 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
9228
|
|
|
self::checkForSupport(); |
9229
|
|
|
} |
9230
|
|
|
|
9231
|
|
|
// |
9232
|
|
|
// fallback via mbstring |
9233
|
|
|
// |
9234
|
|
|
|
9235
|
2 |
|
if (self::$SUPPORT['mbstring'] === true) { |
9236
|
2 |
|
return \mb_strwidth($str, $encoding); |
9237
|
|
|
} |
9238
|
|
|
|
9239
|
|
|
// |
9240
|
|
|
// fallback via vanilla php |
9241
|
|
|
// |
9242
|
|
|
|
9243
|
|
|
if ($encoding !== 'UTF-8') { |
9244
|
|
|
$str = self::encode('UTF-8', $str, false, $encoding); |
9245
|
|
|
} |
9246
|
|
|
|
9247
|
|
|
$wide = 0; |
9248
|
|
|
$str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide); |
9249
|
|
|
|
9250
|
|
|
return ($wide << 1) + self::strlen($str, 'UTF-8'); |
9251
|
|
|
} |
9252
|
|
|
|
9253
|
|
|
/** |
9254
|
|
|
* Get part of a string. |
9255
|
|
|
* |
9256
|
|
|
* @see http://php.net/manual/en/function.mb-substr.php |
9257
|
|
|
* |
9258
|
|
|
* @param string $str <p>The string being checked.</p> |
9259
|
|
|
* @param int $offset <p>The first position used in str.</p> |
9260
|
|
|
* @param int $length [optional] <p>The maximum length of the returned string.</p> |
9261
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9262
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9263
|
|
|
* |
9264
|
|
|
* @return false|string |
9265
|
|
|
* The portion of <i>str</i> specified by the <i>offset</i> and |
9266
|
|
|
* <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i> |
9267
|
|
|
* characters long, <b>FALSE</b> will be returned. |
9268
|
|
|
*/ |
9269
|
401 |
|
public static function substr(string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $cleanUtf8 = false) |
9270
|
|
|
{ |
9271
|
401 |
|
if ($str === '') { |
9272
|
26 |
|
return ''; |
9273
|
|
|
} |
9274
|
|
|
|
9275
|
|
|
// Empty string |
9276
|
396 |
|
if ($length === 0) { |
9277
|
20 |
|
return ''; |
9278
|
|
|
} |
9279
|
|
|
|
9280
|
393 |
|
if ($cleanUtf8 === true) { |
9281
|
|
|
// iconv and mbstring are not tolerant to invalid encoding |
9282
|
|
|
// further, their behaviour is inconsistent with that of PHP's substr |
9283
|
2 |
|
$str = self::clean($str); |
9284
|
|
|
} |
9285
|
|
|
|
9286
|
|
|
// Whole string |
9287
|
393 |
|
if (!$offset && $length === null) { |
9288
|
40 |
|
return $str; |
9289
|
|
|
} |
9290
|
|
|
|
9291
|
364 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9292
|
161 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9293
|
|
|
} |
9294
|
|
|
|
9295
|
364 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
9296
|
|
|
self::checkForSupport(); |
9297
|
|
|
} |
9298
|
|
|
|
9299
|
|
|
// |
9300
|
|
|
// fallback for binary || ascii only |
9301
|
|
|
// |
9302
|
|
|
|
9303
|
|
|
if ( |
9304
|
364 |
|
$encoding === 'CP850' |
9305
|
|
|
|| |
9306
|
364 |
|
$encoding === 'ASCII' |
9307
|
|
|
) { |
9308
|
2 |
|
return self::substr_in_byte($str, $offset, $length); |
9309
|
|
|
} |
9310
|
|
|
|
9311
|
|
|
// |
9312
|
|
|
// fallback via mbstring |
9313
|
|
|
// |
9314
|
|
|
|
9315
|
362 |
|
if (self::$SUPPORT['mbstring'] === true) { |
9316
|
362 |
|
$return = \mb_substr($str, $offset, $length ?? 2147483647, $encoding); |
9317
|
362 |
|
if ($return !== false) { |
9318
|
362 |
|
return $return; |
9319
|
|
|
} |
9320
|
|
|
} |
9321
|
|
|
|
9322
|
|
|
// otherwise we need the string-length and can't fake it via "2147483647" |
9323
|
4 |
|
$str_length = 0; |
9324
|
4 |
|
if ($offset || $length === null) { |
9325
|
4 |
|
$str_length = self::strlen($str, $encoding); |
9326
|
|
|
} |
9327
|
|
|
|
9328
|
|
|
// e.g.: invalid chars + mbstring not installed |
9329
|
4 |
|
if ($str_length === false) { |
9330
|
|
|
return false; |
9331
|
|
|
} |
9332
|
|
|
|
9333
|
|
|
// Empty string |
9334
|
4 |
|
if ($offset === $str_length && !$length) { |
|
|
|
|
9335
|
|
|
return ''; |
9336
|
|
|
} |
9337
|
|
|
|
9338
|
|
|
// Impossible |
9339
|
4 |
|
if ($offset && $offset > $str_length) { |
9340
|
|
|
// "false" is the php native return type here, |
9341
|
|
|
// but we optimized this for performance ... see "2147483647" instead of "strlen" |
9342
|
|
|
return ''; |
9343
|
|
|
} |
9344
|
|
|
|
9345
|
4 |
|
if ($length === null) { |
9346
|
4 |
|
$length = (int) $str_length; |
9347
|
|
|
} else { |
9348
|
2 |
|
$length = (int) $length; |
9349
|
|
|
} |
9350
|
|
|
|
9351
|
|
|
if ( |
9352
|
4 |
|
$encoding !== 'UTF-8' |
9353
|
|
|
&& |
9354
|
4 |
|
self::$SUPPORT['mbstring'] === false |
9355
|
|
|
) { |
9356
|
2 |
|
\trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
9357
|
|
|
} |
9358
|
|
|
|
9359
|
|
|
// |
9360
|
|
|
// fallback via intl |
9361
|
|
|
// |
9362
|
|
|
|
9363
|
|
|
if ( |
9364
|
4 |
|
$encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings |
9365
|
|
|
&& |
9366
|
4 |
|
$offset >= 0 // grapheme_substr() can't handle negative offset |
9367
|
|
|
&& |
9368
|
4 |
|
self::$SUPPORT['intl'] === true |
9369
|
|
|
) { |
9370
|
|
|
$returnTmp = \grapheme_substr($str, $offset, $length); |
9371
|
|
|
if ($returnTmp !== false) { |
9372
|
|
|
return $returnTmp; |
9373
|
|
|
} |
9374
|
|
|
} |
9375
|
|
|
|
9376
|
|
|
// |
9377
|
|
|
// fallback via iconv |
9378
|
|
|
// |
9379
|
|
|
|
9380
|
|
|
if ( |
9381
|
4 |
|
$length >= 0 // "iconv_substr()" can't handle negative length |
9382
|
|
|
&& |
9383
|
4 |
|
self::$SUPPORT['iconv'] === true |
9384
|
|
|
) { |
9385
|
|
|
$returnTmp = \iconv_substr($str, $offset, $length); |
9386
|
|
|
if ($returnTmp !== false) { |
9387
|
|
|
return $returnTmp; |
9388
|
|
|
} |
9389
|
|
|
} |
9390
|
|
|
|
9391
|
|
|
// |
9392
|
|
|
// fallback for ascii only |
9393
|
|
|
// |
9394
|
|
|
|
9395
|
4 |
|
if (self::is_ascii($str)) { |
9396
|
|
|
return \substr($str, $offset, $length); |
9397
|
|
|
} |
9398
|
|
|
|
9399
|
|
|
// |
9400
|
|
|
// fallback via vanilla php |
9401
|
|
|
// |
9402
|
|
|
|
9403
|
|
|
// split to array, and remove invalid characters |
9404
|
4 |
|
$array = self::split($str); |
9405
|
|
|
|
9406
|
|
|
// extract relevant part, and join to make sting again |
9407
|
4 |
|
return \implode('', \array_slice($array, $offset, $length)); |
9408
|
|
|
} |
9409
|
|
|
|
9410
|
|
|
/** |
9411
|
|
|
* Binary safe comparison of two strings from an offset, up to length characters. |
9412
|
|
|
* |
9413
|
|
|
* @param string $str1 <p>The main string being compared.</p> |
9414
|
|
|
* @param string $str2 <p>The secondary string being compared.</p> |
9415
|
|
|
* @param int $offset [optional] <p>The start position for the comparison. If negative, it starts |
9416
|
|
|
* counting from the end of the string.</p> |
9417
|
|
|
* @param int|null $length [optional] <p>The length of the comparison. The default value is the largest |
9418
|
|
|
* of the length of the str compared to the length of main_str less the |
9419
|
|
|
* offset.</p> |
9420
|
|
|
* @param bool $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case |
9421
|
|
|
* insensitive.</p> |
9422
|
|
|
* |
9423
|
|
|
* @return int |
9424
|
|
|
* <strong>< 0</strong> if str1 is less than str2;<br> |
9425
|
|
|
* <strong>> 0</strong> if str1 is greater than str2,<br> |
9426
|
|
|
* <strong>0</strong> if they are equal |
9427
|
|
|
*/ |
9428
|
2 |
|
public static function substr_compare(string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false): int |
9429
|
|
|
{ |
9430
|
|
|
if ( |
9431
|
2 |
|
$offset !== 0 |
9432
|
|
|
|| |
9433
|
2 |
|
$length !== null |
9434
|
|
|
) { |
9435
|
2 |
|
$str1Tmp = self::substr($str1, $offset, $length); |
9436
|
2 |
|
if ($str1Tmp === false) { |
9437
|
|
|
$str1Tmp = ''; |
9438
|
|
|
} |
9439
|
2 |
|
$str1 = (string) $str1Tmp; |
9440
|
|
|
|
9441
|
2 |
|
$str2Tmp = self::substr($str2, 0, self::strlen($str1)); |
|
|
|
|
9442
|
2 |
|
if ($str2Tmp === false) { |
9443
|
|
|
$str2Tmp = ''; |
9444
|
|
|
} |
9445
|
2 |
|
$str2 = (string) $str2Tmp; |
9446
|
|
|
} |
9447
|
|
|
|
9448
|
2 |
|
if ($case_insensitivity === true) { |
9449
|
2 |
|
return self::strcasecmp($str1, $str2); |
9450
|
|
|
} |
9451
|
|
|
|
9452
|
2 |
|
return self::strcmp($str1, $str2); |
9453
|
|
|
} |
9454
|
|
|
|
9455
|
|
|
/** |
9456
|
|
|
* Count the number of substring occurrences. |
9457
|
|
|
* |
9458
|
|
|
* @see http://php.net/manual/en/function.substr-count.php |
9459
|
|
|
* |
9460
|
|
|
* @param string $haystack <p>The string to search in.</p> |
9461
|
|
|
* @param string $needle <p>The substring to search for.</p> |
9462
|
|
|
* @param int $offset [optional] <p>The offset where to start counting.</p> |
9463
|
|
|
* @param int $length [optional] <p> |
9464
|
|
|
* The maximum length after the specified offset to search for the |
9465
|
|
|
* substring. It outputs a warning if the offset plus the length is |
9466
|
|
|
* greater than the haystack length. |
9467
|
|
|
* </p> |
9468
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9469
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9470
|
|
|
* |
9471
|
|
|
* @return false|int this functions returns an integer or false if there isn't a string |
9472
|
|
|
*/ |
9473
|
18 |
|
public static function substr_count( |
9474
|
|
|
string $haystack, |
9475
|
|
|
string $needle, |
9476
|
|
|
int $offset = 0, |
9477
|
|
|
int $length = null, |
9478
|
|
|
string $encoding = 'UTF-8', |
9479
|
|
|
bool $cleanUtf8 = false |
9480
|
|
|
) { |
9481
|
18 |
|
if ($haystack === '' || $needle === '') { |
9482
|
2 |
|
return false; |
9483
|
|
|
} |
9484
|
|
|
|
9485
|
18 |
|
if ($offset || $length !== null) { |
9486
|
2 |
|
if ($length === null) { |
9487
|
2 |
|
$lengthTmp = self::strlen($haystack); |
9488
|
2 |
|
if ($lengthTmp === false) { |
9489
|
|
|
return false; |
9490
|
|
|
} |
9491
|
2 |
|
$length = (int) $lengthTmp; |
9492
|
|
|
} |
9493
|
|
|
|
9494
|
|
|
if ( |
9495
|
|
|
( |
9496
|
2 |
|
$length !== 0 |
9497
|
|
|
&& |
9498
|
2 |
|
$offset !== 0 |
9499
|
|
|
) |
9500
|
|
|
&& |
9501
|
2 |
|
($length + $offset) <= 0 |
9502
|
|
|
&& |
9503
|
2 |
|
Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1 |
9504
|
|
|
) { |
9505
|
|
|
return false; |
9506
|
|
|
} |
9507
|
|
|
|
9508
|
2 |
|
$haystackTmp = self::substr($haystack, $offset, $length, $encoding); |
9509
|
2 |
|
if ($haystackTmp === false) { |
9510
|
|
|
$haystackTmp = ''; |
9511
|
|
|
} |
9512
|
2 |
|
$haystack = (string) $haystackTmp; |
9513
|
|
|
} |
9514
|
|
|
|
9515
|
18 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9516
|
8 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9517
|
|
|
} |
9518
|
|
|
|
9519
|
18 |
|
if ($cleanUtf8 === true) { |
9520
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
9521
|
|
|
// if invalid characters are found in $haystack before $needle |
9522
|
|
|
$needle = self::clean($needle); |
9523
|
|
|
$haystack = self::clean($haystack); |
9524
|
|
|
} |
9525
|
|
|
|
9526
|
18 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
9527
|
|
|
self::checkForSupport(); |
9528
|
|
|
} |
9529
|
|
|
|
9530
|
|
|
if ( |
9531
|
18 |
|
$encoding !== 'UTF-8' |
9532
|
|
|
&& |
9533
|
18 |
|
self::$SUPPORT['mbstring'] === false |
9534
|
|
|
) { |
9535
|
|
|
\trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); |
9536
|
|
|
} |
9537
|
|
|
|
9538
|
18 |
|
if (self::$SUPPORT['mbstring'] === true) { |
9539
|
18 |
|
return \mb_substr_count($haystack, $needle, $encoding); |
9540
|
|
|
} |
9541
|
|
|
|
9542
|
|
|
\preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER); |
9543
|
|
|
|
9544
|
|
|
return \count($matches); |
9545
|
|
|
} |
9546
|
|
|
|
9547
|
|
|
/** |
9548
|
|
|
* Count the number of substring occurrences. |
9549
|
|
|
* |
9550
|
|
|
* @param string $haystack <p> |
9551
|
|
|
* The string being checked. |
9552
|
|
|
* </p> |
9553
|
|
|
* @param string $needle <p> |
9554
|
|
|
* The string being found. |
9555
|
|
|
* </p> |
9556
|
|
|
* @param int $offset [optional] <p> |
9557
|
|
|
* The offset where to start counting |
9558
|
|
|
* </p> |
9559
|
|
|
* @param int $length [optional] <p> |
9560
|
|
|
* The maximum length after the specified offset to search for the |
9561
|
|
|
* substring. It outputs a warning if the offset plus the length is |
9562
|
|
|
* greater than the haystack length. |
9563
|
|
|
* </p> |
9564
|
|
|
* |
9565
|
|
|
* @return false|int the number of times the |
9566
|
|
|
* needle substring occurs in the |
9567
|
|
|
* haystack string |
9568
|
|
|
*/ |
9569
|
36 |
|
public static function substr_count_in_byte(string $haystack, string $needle, int $offset = 0, int $length = null) |
9570
|
|
|
{ |
9571
|
36 |
|
if ($haystack === '' || $needle === '') { |
9572
|
|
|
return 0; |
9573
|
|
|
} |
9574
|
|
|
|
9575
|
36 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
9576
|
|
|
self::checkForSupport(); |
9577
|
|
|
} |
9578
|
|
|
|
9579
|
|
|
if ( |
9580
|
36 |
|
($offset || $length !== null) |
9581
|
|
|
&& |
9582
|
36 |
|
self::$SUPPORT['mbstring_func_overload'] === true |
9583
|
|
|
) { |
9584
|
|
|
if ($length === null) { |
9585
|
|
|
$lengthTmp = self::strlen($haystack); |
9586
|
|
|
if ($lengthTmp === false) { |
9587
|
|
|
return false; |
9588
|
|
|
} |
9589
|
|
|
$length = (int) $lengthTmp; |
9590
|
|
|
} |
9591
|
|
|
|
9592
|
|
|
if ( |
9593
|
|
|
( |
9594
|
|
|
$length !== 0 |
9595
|
|
|
&& |
9596
|
|
|
$offset !== 0 |
9597
|
|
|
) |
9598
|
|
|
&& |
9599
|
|
|
($length + $offset) <= 0 |
9600
|
|
|
&& |
9601
|
|
|
Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1 |
9602
|
|
|
) { |
9603
|
|
|
return false; |
9604
|
|
|
} |
9605
|
|
|
|
9606
|
|
|
$haystackTmp = self::substr_in_byte($haystack, $offset, $length); |
9607
|
|
|
if ($haystackTmp === false) { |
|
|
|
|
9608
|
|
|
$haystackTmp = ''; |
9609
|
|
|
} |
9610
|
|
|
$haystack = (string) $haystackTmp; |
9611
|
|
|
} |
9612
|
|
|
|
9613
|
36 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
9614
|
|
|
// "mb_" is available if overload is used, so use it ... |
9615
|
|
|
return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT |
9616
|
|
|
} |
9617
|
|
|
|
9618
|
36 |
|
return \substr_count($haystack, $needle, $offset, $length); |
9619
|
|
|
} |
9620
|
|
|
|
9621
|
|
|
/** |
9622
|
|
|
* Returns the number of occurrences of $substring in the given string. |
9623
|
|
|
* By default, the comparison is case-sensitive, but can be made insensitive |
9624
|
|
|
* by setting $caseSensitive to false. |
9625
|
|
|
* |
9626
|
|
|
* @param string $str <p>The input string.</p> |
9627
|
|
|
* @param string $substring <p>The substring to search for.</p> |
9628
|
|
|
* @param bool $caseSensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> |
9629
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9630
|
|
|
* |
9631
|
|
|
* @return int |
9632
|
|
|
*/ |
9633
|
15 |
|
public static function substr_count_simple(string $str, string $substring, $caseSensitive = true, string $encoding = 'UTF-8'): int |
9634
|
|
|
{ |
9635
|
15 |
|
if ($str === '' || $substring === '') { |
9636
|
2 |
|
return 0; |
9637
|
|
|
} |
9638
|
|
|
|
9639
|
|
|
// only a fallback to prevent BC in the api ... |
9640
|
13 |
|
if ($caseSensitive !== false && $caseSensitive !== true) { |
|
|
|
|
9641
|
4 |
|
$encoding = (string) $caseSensitive; |
9642
|
|
|
} |
9643
|
|
|
|
9644
|
13 |
|
if (!$caseSensitive) { |
9645
|
6 |
|
$str = self::strtocasefold($str, true, false, $encoding, null, false); |
9646
|
6 |
|
$substring = self::strtocasefold($substring, true, false, $encoding, null, false); |
9647
|
|
|
} |
9648
|
|
|
|
9649
|
13 |
|
return (int) self::substr_count($str, $substring, 0, null, $encoding); |
9650
|
|
|
} |
9651
|
|
|
|
9652
|
|
|
/** |
9653
|
|
|
* Removes an prefix ($needle) from start of the string ($haystack), case insensitive. |
9654
|
|
|
* |
9655
|
|
|
* @param string $haystack <p>The string to search in.</p> |
9656
|
|
|
* @param string $needle <p>The substring to search for.</p> |
9657
|
|
|
* |
9658
|
|
|
* @return string return the sub-string |
9659
|
|
|
*/ |
9660
|
2 |
|
public static function substr_ileft(string $haystack, string $needle): string |
9661
|
|
|
{ |
9662
|
2 |
|
if ($haystack === '') { |
9663
|
2 |
|
return ''; |
9664
|
|
|
} |
9665
|
|
|
|
9666
|
2 |
|
if ($needle === '') { |
9667
|
2 |
|
return $haystack; |
9668
|
|
|
} |
9669
|
|
|
|
9670
|
2 |
|
if (self::str_istarts_with($haystack, $needle) === true) { |
9671
|
2 |
|
$haystackTmp = self::substr($haystack, self::strlen($needle)); |
|
|
|
|
9672
|
2 |
|
if ($haystackTmp === false) { |
9673
|
|
|
$haystackTmp = ''; |
9674
|
|
|
} |
9675
|
2 |
|
$haystack = (string) $haystackTmp; |
9676
|
|
|
} |
9677
|
|
|
|
9678
|
2 |
|
return $haystack; |
9679
|
|
|
} |
9680
|
|
|
|
9681
|
|
|
/** |
9682
|
|
|
* Get part of a string process in bytes. |
9683
|
|
|
* |
9684
|
|
|
* @param string $str <p>The string being checked.</p> |
9685
|
|
|
* @param int $offset <p>The first position used in str.</p> |
9686
|
|
|
* @param int $length [optional] <p>The maximum length of the returned string.</p> |
9687
|
|
|
* |
9688
|
|
|
* @return false|string |
9689
|
|
|
* The portion of <i>str</i> specified by the <i>offset</i> and |
9690
|
|
|
* <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i> |
9691
|
|
|
* characters long, <b>FALSE</b> will be returned. |
9692
|
|
|
*/ |
9693
|
51 |
|
public static function substr_in_byte(string $str, int $offset = 0, int $length = null) |
9694
|
|
|
{ |
9695
|
51 |
|
if ($str === '') { |
9696
|
|
|
return ''; |
9697
|
|
|
} |
9698
|
|
|
|
9699
|
|
|
// Empty string |
9700
|
51 |
|
if ($length === 0) { |
9701
|
|
|
return ''; |
9702
|
|
|
} |
9703
|
|
|
|
9704
|
|
|
// Whole string |
9705
|
51 |
|
if (!$offset && $length === null) { |
9706
|
|
|
return $str; |
9707
|
|
|
} |
9708
|
|
|
|
9709
|
51 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
9710
|
|
|
self::checkForSupport(); |
9711
|
|
|
} |
9712
|
|
|
|
9713
|
51 |
|
if (self::$SUPPORT['mbstring_func_overload'] === true) { |
9714
|
|
|
// "mb_" is available if overload is used, so use it ... |
9715
|
|
|
return \mb_substr($str, $offset, $length ?? 2147483647, 'CP850'); // 8-BIT |
9716
|
|
|
} |
9717
|
|
|
|
9718
|
51 |
|
return \substr($str, $offset, $length ?? 2147483647); |
9719
|
|
|
} |
9720
|
|
|
|
9721
|
|
|
/** |
9722
|
|
|
* Removes an suffix ($needle) from end of the string ($haystack), case insensitive. |
9723
|
|
|
* |
9724
|
|
|
* @param string $haystack <p>The string to search in.</p> |
9725
|
|
|
* @param string $needle <p>The substring to search for.</p> |
9726
|
|
|
* |
9727
|
|
|
* @return string return the sub-string |
9728
|
|
|
*/ |
9729
|
2 |
|
public static function substr_iright(string $haystack, string $needle): string |
9730
|
|
|
{ |
9731
|
2 |
|
if ($haystack === '') { |
9732
|
2 |
|
return ''; |
9733
|
|
|
} |
9734
|
|
|
|
9735
|
2 |
|
if ($needle === '') { |
9736
|
2 |
|
return $haystack; |
9737
|
|
|
} |
9738
|
|
|
|
9739
|
2 |
|
if (self::str_iends_with($haystack, $needle) === true) { |
9740
|
2 |
|
$haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle)); |
9741
|
2 |
|
if ($haystackTmp === false) { |
9742
|
|
|
$haystackTmp = ''; |
9743
|
|
|
} |
9744
|
2 |
|
$haystack = (string) $haystackTmp; |
9745
|
|
|
} |
9746
|
|
|
|
9747
|
2 |
|
return $haystack; |
9748
|
|
|
} |
9749
|
|
|
|
9750
|
|
|
/** |
9751
|
|
|
* Removes an prefix ($needle) from start of the string ($haystack). |
9752
|
|
|
* |
9753
|
|
|
* @param string $haystack <p>The string to search in.</p> |
9754
|
|
|
* @param string $needle <p>The substring to search for.</p> |
9755
|
|
|
* |
9756
|
|
|
* @return string return the sub-string |
9757
|
|
|
*/ |
9758
|
2 |
|
public static function substr_left(string $haystack, string $needle): string |
9759
|
|
|
{ |
9760
|
2 |
|
if ($haystack === '') { |
9761
|
2 |
|
return ''; |
9762
|
|
|
} |
9763
|
|
|
|
9764
|
2 |
|
if ($needle === '') { |
9765
|
2 |
|
return $haystack; |
9766
|
|
|
} |
9767
|
|
|
|
9768
|
2 |
|
if (self::str_starts_with($haystack, $needle) === true) { |
9769
|
2 |
|
$haystackTmp = self::substr($haystack, self::strlen($needle)); |
|
|
|
|
9770
|
2 |
|
if ($haystackTmp === false) { |
9771
|
|
|
$haystackTmp = ''; |
9772
|
|
|
} |
9773
|
2 |
|
$haystack = (string) $haystackTmp; |
9774
|
|
|
} |
9775
|
|
|
|
9776
|
2 |
|
return $haystack; |
9777
|
|
|
} |
9778
|
|
|
|
9779
|
|
|
/** |
9780
|
|
|
* Replace text within a portion of a string. |
9781
|
|
|
* |
9782
|
|
|
* source: https://gist.github.com/stemar/8287074 |
9783
|
|
|
* |
9784
|
|
|
* @param string|string[] $str <p>The input string or an array of stings.</p> |
9785
|
|
|
* @param string|string[] $replacement <p>The replacement string or an array of stings.</p> |
9786
|
|
|
* @param int|int[] $offset <p> |
9787
|
|
|
* If start is positive, the replacing will begin at the start'th offset |
9788
|
|
|
* into string. |
9789
|
|
|
* <br><br> |
9790
|
|
|
* If start is negative, the replacing will begin at the start'th character |
9791
|
|
|
* from the end of string. |
9792
|
|
|
* </p> |
9793
|
|
|
* @param int|int[]|null $length [optional] <p>If given and is positive, it represents the length of the |
9794
|
|
|
* portion of string which is to be replaced. If it is negative, it |
9795
|
|
|
* represents the number of characters from the end of string at which to |
9796
|
|
|
* stop replacing. If it is not given, then it will default to strlen( |
9797
|
|
|
* string ); i.e. end the replacing at the end of string. Of course, if |
9798
|
|
|
* length is zero then this function will have the effect of inserting |
9799
|
|
|
* replacement into string at the given start offset.</p> |
9800
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9801
|
|
|
* |
9802
|
|
|
* @return string|string[] The result string is returned. If string is an array then array is returned. |
9803
|
|
|
*/ |
9804
|
10 |
|
public static function substr_replace($str, $replacement, $offset, $length = null, string $encoding = 'UTF-8') |
9805
|
|
|
{ |
9806
|
10 |
|
if (\is_array($str) === true) { |
9807
|
1 |
|
$num = \count($str); |
9808
|
|
|
|
9809
|
|
|
// the replacement |
9810
|
1 |
|
if (\is_array($replacement) === true) { |
9811
|
1 |
|
$replacement = \array_slice($replacement, 0, $num); |
9812
|
|
|
} else { |
9813
|
1 |
|
$replacement = \array_pad([$replacement], $num, $replacement); |
9814
|
|
|
} |
9815
|
|
|
|
9816
|
|
|
// the offset |
9817
|
1 |
|
if (\is_array($offset) === true) { |
9818
|
1 |
|
$offset = \array_slice($offset, 0, $num); |
9819
|
1 |
|
foreach ($offset as &$valueTmp) { |
9820
|
1 |
|
$valueTmp = (int) $valueTmp === $valueTmp ? $valueTmp : 0; |
9821
|
|
|
} |
9822
|
1 |
|
unset($valueTmp); |
9823
|
|
|
} else { |
9824
|
1 |
|
$offset = \array_pad([$offset], $num, $offset); |
9825
|
|
|
} |
9826
|
|
|
|
9827
|
|
|
// the length |
9828
|
1 |
|
if ($length === null) { |
9829
|
1 |
|
$length = \array_fill(0, $num, 0); |
9830
|
1 |
|
} elseif (\is_array($length) === true) { |
9831
|
1 |
|
$length = \array_slice($length, 0, $num); |
9832
|
1 |
|
foreach ($length as &$valueTmpV2) { |
9833
|
1 |
|
if ($valueTmpV2 !== null) { |
9834
|
1 |
|
$valueTmpV2 = (int) $valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num; |
9835
|
|
|
} else { |
9836
|
1 |
|
$valueTmpV2 = 0; |
9837
|
|
|
} |
9838
|
|
|
} |
9839
|
1 |
|
unset($valueTmpV2); |
9840
|
|
|
} else { |
9841
|
1 |
|
$length = \array_pad([$length], $num, $length); |
9842
|
|
|
} |
9843
|
|
|
|
9844
|
|
|
// recursive call |
9845
|
1 |
|
return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length); |
9846
|
|
|
} |
9847
|
|
|
|
9848
|
10 |
|
if (\is_array($replacement) === true) { |
9849
|
1 |
|
if (\count($replacement) > 0) { |
9850
|
1 |
|
$replacement = $replacement[0]; |
9851
|
|
|
} else { |
9852
|
1 |
|
$replacement = ''; |
9853
|
|
|
} |
9854
|
|
|
} |
9855
|
|
|
|
9856
|
|
|
// init |
9857
|
10 |
|
$str = (string) $str; |
9858
|
10 |
|
$replacement = (string) $replacement; |
9859
|
|
|
|
9860
|
10 |
|
if ($str === '') { |
9861
|
1 |
|
return $replacement; |
9862
|
|
|
} |
9863
|
|
|
|
9864
|
9 |
|
if (self::is_ascii($str)) { |
9865
|
6 |
|
return ($length === null) ? |
9866
|
|
|
\substr_replace($str, $replacement, $offset) : |
|
|
|
|
9867
|
6 |
|
\substr_replace($str, $replacement, $offset, $length); |
|
|
|
|
9868
|
|
|
} |
9869
|
|
|
|
9870
|
8 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
9871
|
|
|
self::checkForSupport(); |
9872
|
|
|
} |
9873
|
|
|
|
9874
|
8 |
|
if (self::$SUPPORT['mbstring'] === true) { |
9875
|
8 |
|
$string_length = self::strlen($str, $encoding); |
9876
|
|
|
|
9877
|
8 |
|
if ($offset < 0) { |
9878
|
1 |
|
$offset = \max(0, $string_length + $offset); |
9879
|
8 |
|
} elseif ($offset > $string_length) { |
9880
|
|
|
$offset = $string_length; |
9881
|
|
|
} |
9882
|
|
|
|
9883
|
8 |
|
if ($length < 0) { |
9884
|
1 |
|
$length = \max(0, $string_length - $offset + $length); |
9885
|
8 |
|
} elseif ($length === null || $length > $string_length) { |
9886
|
3 |
|
$length = $string_length; |
9887
|
|
|
} |
9888
|
|
|
|
9889
|
8 |
|
if (($offset + $length) > $string_length) { |
9890
|
3 |
|
$length = $string_length - $offset; |
9891
|
|
|
} |
9892
|
|
|
|
9893
|
8 |
|
return self::substr($str, 0, $offset, $encoding) . $replacement . self::substr($str, $offset + $length, $string_length - $offset - $length, $encoding); |
|
|
|
|
9894
|
|
|
} |
9895
|
|
|
|
9896
|
|
|
\preg_match_all('/./us', $str, $smatches); |
9897
|
|
|
\preg_match_all('/./us', $replacement, $rmatches); |
9898
|
|
|
|
9899
|
|
|
if ($length === null) { |
9900
|
|
|
$lengthTmp = self::strlen($str, $encoding); |
9901
|
|
|
if ($lengthTmp === false) { |
9902
|
|
|
// e.g.: non mbstring support + invalid chars |
9903
|
|
|
return ''; |
9904
|
|
|
} |
9905
|
|
|
$length = (int) $lengthTmp; |
9906
|
|
|
} |
9907
|
|
|
|
9908
|
|
|
\array_splice($smatches[0], $offset, $length, $rmatches[0]); |
|
|
|
|
9909
|
|
|
|
9910
|
|
|
return \implode('', $smatches[0]); |
9911
|
|
|
} |
9912
|
|
|
|
9913
|
|
|
/** |
9914
|
|
|
* Removes an suffix ($needle) from end of the string ($haystack). |
9915
|
|
|
* |
9916
|
|
|
* @param string $haystack <p>The string to search in.</p> |
9917
|
|
|
* @param string $needle <p>The substring to search for.</p> |
9918
|
|
|
* |
9919
|
|
|
* @return string return the sub-string |
9920
|
|
|
*/ |
9921
|
2 |
|
public static function substr_right(string $haystack, string $needle): string |
9922
|
|
|
{ |
9923
|
2 |
|
if ($haystack === '') { |
9924
|
2 |
|
return ''; |
9925
|
|
|
} |
9926
|
|
|
|
9927
|
2 |
|
if ($needle === '') { |
9928
|
2 |
|
return $haystack; |
9929
|
|
|
} |
9930
|
|
|
|
9931
|
2 |
|
if (self::str_ends_with($haystack, $needle) === true) { |
9932
|
2 |
|
$haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle)); |
9933
|
2 |
|
if ($haystackTmp === false) { |
9934
|
|
|
$haystackTmp = ''; |
9935
|
|
|
} |
9936
|
2 |
|
$haystack = (string) $haystackTmp; |
9937
|
|
|
} |
9938
|
|
|
|
9939
|
2 |
|
return $haystack; |
9940
|
|
|
} |
9941
|
|
|
|
9942
|
|
|
/** |
9943
|
|
|
* Returns a case swapped version of the string. |
9944
|
|
|
* |
9945
|
|
|
* @param string $str <p>The input string.</p> |
9946
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
9947
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
9948
|
|
|
* |
9949
|
|
|
* @return string each character's case swapped |
9950
|
|
|
*/ |
9951
|
6 |
|
public static function swapCase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string |
9952
|
|
|
{ |
9953
|
6 |
|
if ($str === '') { |
9954
|
1 |
|
return ''; |
9955
|
|
|
} |
9956
|
|
|
|
9957
|
6 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
9958
|
4 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
9959
|
|
|
} |
9960
|
|
|
|
9961
|
6 |
|
if ($cleanUtf8 === true) { |
9962
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
9963
|
|
|
// if invalid characters are found in $haystack before $needle |
9964
|
2 |
|
$str = self::clean($str); |
9965
|
|
|
} |
9966
|
|
|
|
9967
|
6 |
|
return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str); |
9968
|
|
|
} |
9969
|
|
|
|
9970
|
|
|
/** |
9971
|
|
|
* Checks whether mbstring is available on the server. |
9972
|
|
|
* |
9973
|
|
|
* @return bool |
9974
|
|
|
* <strong>true</strong> if available, <strong>false</strong> otherwise |
9975
|
|
|
*/ |
9976
|
|
|
public static function symfony_polyfill_used(): bool |
9977
|
|
|
{ |
9978
|
|
|
// init |
9979
|
|
|
$return = false; |
9980
|
|
|
|
9981
|
|
|
$returnTmp = \extension_loaded('mbstring') ? true : false; |
9982
|
|
|
if ($returnTmp === false && \function_exists('mb_strlen')) { |
9983
|
|
|
$return = true; |
9984
|
|
|
} |
9985
|
|
|
|
9986
|
|
|
$returnTmp = \extension_loaded('iconv') ? true : false; |
9987
|
|
|
if ($returnTmp === false && \function_exists('iconv')) { |
9988
|
|
|
$return = true; |
9989
|
|
|
} |
9990
|
|
|
|
9991
|
|
|
return $return; |
9992
|
|
|
} |
9993
|
|
|
|
9994
|
|
|
/** |
9995
|
|
|
* @param string $str |
9996
|
|
|
* @param int $tabLength |
9997
|
|
|
* |
9998
|
|
|
* @return string |
9999
|
|
|
*/ |
10000
|
6 |
|
public static function tabs_to_spaces(string $str, int $tabLength = 4): string |
10001
|
|
|
{ |
10002
|
6 |
|
return \str_replace("\t", \str_repeat(' ', $tabLength), $str); |
10003
|
|
|
} |
10004
|
|
|
|
10005
|
|
|
/** |
10006
|
|
|
* Converts the first character of each word in the string to uppercase |
10007
|
|
|
* and all other chars to lowercase. |
10008
|
|
|
* |
10009
|
|
|
* @param string $str <p>The input string.</p> |
10010
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10011
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10012
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
10013
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
10014
|
|
|
* |
10015
|
|
|
* @return string string with all characters of $str being title-cased |
10016
|
|
|
*/ |
10017
|
5 |
|
public static function titlecase(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string |
10018
|
|
|
{ |
10019
|
5 |
|
if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { |
10020
|
2 |
|
$encoding = self::normalize_encoding($encoding, 'UTF-8'); |
10021
|
|
|
} |
10022
|
|
|
|
10023
|
5 |
|
return self::str_titleize($str, null, $encoding, $cleanUtf8, $lang, $tryToKeepStringLength, false); |
10024
|
|
|
} |
10025
|
|
|
|
10026
|
|
|
/** |
10027
|
|
|
* alias for "UTF8::to_ascii()" |
10028
|
|
|
* |
10029
|
|
|
* @see UTF8::to_ascii() |
10030
|
|
|
* |
10031
|
|
|
* @param string $str |
10032
|
|
|
* @param string $subst_chr |
10033
|
|
|
* @param bool $strict |
10034
|
|
|
* |
10035
|
|
|
* @return string |
10036
|
|
|
* |
10037
|
|
|
* @deprecated <p>use "UTF8::to_ascii()"</p> |
10038
|
|
|
*/ |
10039
|
7 |
|
public static function toAscii(string $str, string $subst_chr = '?', bool $strict = false): string |
10040
|
|
|
{ |
10041
|
7 |
|
return self::to_ascii($str, $subst_chr, $strict); |
10042
|
|
|
} |
10043
|
|
|
|
10044
|
|
|
/** |
10045
|
|
|
* alias for "UTF8::to_iso8859()" |
10046
|
|
|
* |
10047
|
|
|
* @see UTF8::to_iso8859() |
10048
|
|
|
* |
10049
|
|
|
* @param string|string[] $str |
10050
|
|
|
* |
10051
|
|
|
* @return string|string[] |
10052
|
|
|
* |
10053
|
|
|
* @deprecated <p>use "UTF8::to_iso8859()"</p> |
10054
|
|
|
*/ |
10055
|
2 |
|
public static function toIso8859($str) |
10056
|
|
|
{ |
10057
|
2 |
|
return self::to_iso8859($str); |
10058
|
|
|
} |
10059
|
|
|
|
10060
|
|
|
/** |
10061
|
|
|
* alias for "UTF8::to_latin1()" |
10062
|
|
|
* |
10063
|
|
|
* @see UTF8::to_latin1() |
10064
|
|
|
* |
10065
|
|
|
* @param string|string[] $str |
10066
|
|
|
* |
10067
|
|
|
* @return string|string[] |
10068
|
|
|
* |
10069
|
|
|
* @deprecated <p>use "UTF8::to_latin1()"</p> |
10070
|
|
|
*/ |
10071
|
2 |
|
public static function toLatin1($str) |
10072
|
|
|
{ |
10073
|
2 |
|
return self::to_latin1($str); |
10074
|
|
|
} |
10075
|
|
|
|
10076
|
|
|
/** |
10077
|
|
|
* alias for "UTF8::to_utf8()" |
10078
|
|
|
* |
10079
|
|
|
* @see UTF8::to_utf8() |
10080
|
|
|
* |
10081
|
|
|
* @param string|string[] $str |
10082
|
|
|
* |
10083
|
|
|
* @return string|string[] |
10084
|
|
|
* |
10085
|
|
|
* @deprecated <p>use "UTF8::to_utf8()"</p> |
10086
|
|
|
*/ |
10087
|
2 |
|
public static function toUTF8($str) |
10088
|
|
|
{ |
10089
|
2 |
|
return self::to_utf8($str); |
10090
|
|
|
} |
10091
|
|
|
|
10092
|
|
|
/** |
10093
|
|
|
* Convert a string into ASCII. |
10094
|
|
|
* |
10095
|
|
|
* @param string $str <p>The input string.</p> |
10096
|
|
|
* @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p> |
10097
|
|
|
* @param bool $strict [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad |
10098
|
|
|
* performance</p> |
10099
|
|
|
* |
10100
|
|
|
* @return string |
10101
|
|
|
*/ |
10102
|
38 |
|
public static function to_ascii(string $str, string $unknown = '?', bool $strict = false): string |
10103
|
|
|
{ |
10104
|
38 |
|
static $UTF8_TO_ASCII; |
10105
|
|
|
|
10106
|
38 |
|
if ($str === '') { |
10107
|
3 |
|
return ''; |
10108
|
|
|
} |
10109
|
|
|
|
10110
|
|
|
// check if we only have ASCII, first (better performance) |
10111
|
35 |
|
if (self::is_ascii($str) === true) { |
10112
|
9 |
|
return $str; |
10113
|
|
|
} |
10114
|
|
|
|
10115
|
28 |
|
$str = self::clean( |
10116
|
28 |
|
$str, |
10117
|
28 |
|
true, |
10118
|
28 |
|
true, |
10119
|
28 |
|
true, |
10120
|
28 |
|
false, |
10121
|
28 |
|
true, |
10122
|
28 |
|
true |
10123
|
|
|
); |
10124
|
|
|
|
10125
|
|
|
// check again, if we only have ASCII, now ... |
10126
|
28 |
|
if (self::is_ascii($str) === true) { |
10127
|
10 |
|
return $str; |
10128
|
|
|
} |
10129
|
|
|
|
10130
|
19 |
|
if ($strict === true) { |
10131
|
1 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
10132
|
|
|
self::checkForSupport(); |
10133
|
|
|
} |
10134
|
|
|
|
10135
|
1 |
|
if (self::$SUPPORT['intl'] === true) { |
10136
|
|
|
// INFO: https://unicode.org/cldr/utility/character.jsp?a=%E2%84%8C |
10137
|
|
|
/** @noinspection PhpComposerExtensionStubsInspection */ |
10138
|
1 |
|
$str = \transliterator_transliterate('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;', $str); |
10139
|
|
|
|
10140
|
|
|
// check again, if we only have ASCII, now ... |
10141
|
1 |
|
if (self::is_ascii($str) === true) { |
10142
|
1 |
|
return $str; |
10143
|
|
|
} |
10144
|
|
|
} |
10145
|
|
|
} |
10146
|
|
|
|
10147
|
19 |
|
if (self::$ORD === null) { |
10148
|
|
|
self::$ORD = self::getData('ord'); |
10149
|
|
|
} |
10150
|
|
|
|
10151
|
19 |
|
\preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar); |
10152
|
19 |
|
$chars = $ar[0]; |
10153
|
19 |
|
$ord = null; |
10154
|
19 |
|
foreach ($chars as &$c) { |
10155
|
19 |
|
$ordC0 = self::$ORD[$c[0]]; |
10156
|
|
|
|
10157
|
19 |
|
if ($ordC0 >= 0 && $ordC0 <= 127) { |
10158
|
15 |
|
continue; |
10159
|
|
|
} |
10160
|
|
|
|
10161
|
19 |
|
$ordC1 = self::$ORD[$c[1]]; |
10162
|
|
|
|
10163
|
|
|
// ASCII - next please |
10164
|
19 |
|
if ($ordC0 >= 192 && $ordC0 <= 223) { |
10165
|
17 |
|
$ord = ($ordC0 - 192) * 64 + ($ordC1 - 128); |
10166
|
|
|
} |
10167
|
|
|
|
10168
|
19 |
|
if ($ordC0 >= 224) { |
10169
|
8 |
|
$ordC2 = self::$ORD[$c[2]]; |
10170
|
|
|
|
10171
|
8 |
|
if ($ordC0 <= 239) { |
10172
|
7 |
|
$ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128); |
10173
|
|
|
} |
10174
|
|
|
|
10175
|
8 |
|
if ($ordC0 >= 240) { |
10176
|
2 |
|
$ordC3 = self::$ORD[$c[3]]; |
10177
|
|
|
|
10178
|
2 |
|
if ($ordC0 <= 247) { |
10179
|
2 |
|
$ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128); |
10180
|
|
|
} |
10181
|
|
|
|
10182
|
2 |
|
if ($ordC0 >= 248) { |
10183
|
|
|
$ordC4 = self::$ORD[$c[4]]; |
10184
|
|
|
|
10185
|
|
|
if ($ordC0 <= 251) { |
10186
|
|
|
$ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128); |
10187
|
|
|
} |
10188
|
|
|
|
10189
|
|
|
if ($ordC0 >= 252) { |
10190
|
|
|
$ordC5 = self::$ORD[$c[5]]; |
10191
|
|
|
|
10192
|
|
|
if ($ordC0 <= 253) { |
10193
|
|
|
$ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128); |
10194
|
|
|
} |
10195
|
|
|
} |
10196
|
|
|
} |
10197
|
|
|
} |
10198
|
|
|
} |
10199
|
|
|
|
10200
|
19 |
|
if ($ordC0 === 254 || $ordC0 === 255) { |
10201
|
|
|
$c = $unknown; |
10202
|
|
|
|
10203
|
|
|
continue; |
10204
|
|
|
} |
10205
|
|
|
|
10206
|
19 |
|
if ($ord === null) { |
10207
|
|
|
$c = $unknown; |
10208
|
|
|
|
10209
|
|
|
continue; |
10210
|
|
|
} |
10211
|
|
|
|
10212
|
19 |
|
$bank = $ord >> 8; |
10213
|
19 |
|
if (!isset($UTF8_TO_ASCII[$bank])) { |
10214
|
9 |
|
$UTF8_TO_ASCII[$bank] = self::getDataIfExists(\sprintf('x%02x', $bank)); |
10215
|
9 |
|
if ($UTF8_TO_ASCII[$bank] === false) { |
10216
|
2 |
|
$UTF8_TO_ASCII[$bank] = []; |
10217
|
|
|
} |
10218
|
|
|
} |
10219
|
|
|
|
10220
|
19 |
|
$newchar = $ord & 255; |
10221
|
|
|
|
10222
|
19 |
|
if (isset($UTF8_TO_ASCII[$bank][$newchar])) { |
10223
|
|
|
|
10224
|
|
|
// keep for debugging |
10225
|
|
|
/* |
10226
|
|
|
echo "file: " . sprintf('x%02x', $bank) . "\n"; |
10227
|
|
|
echo "char: " . $c . "\n"; |
10228
|
|
|
echo "ord: " . $ord . "\n"; |
10229
|
|
|
echo "newchar: " . $newchar . "\n"; |
10230
|
|
|
echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n"; |
10231
|
|
|
echo "bank:" . $bank . "\n\n"; |
10232
|
|
|
*/ |
10233
|
|
|
|
10234
|
18 |
|
$c = $UTF8_TO_ASCII[$bank][$newchar]; |
10235
|
|
|
} else { |
10236
|
|
|
|
10237
|
|
|
// keep for debugging missing chars |
10238
|
|
|
/* |
10239
|
|
|
echo "file: " . sprintf('x%02x', $bank) . "\n"; |
10240
|
|
|
echo "char: " . $c . "\n"; |
10241
|
|
|
echo "ord: " . $ord . "\n"; |
10242
|
|
|
echo "newchar: " . $newchar . "\n"; |
10243
|
|
|
echo "bank:" . $bank . "\n\n"; |
10244
|
|
|
*/ |
10245
|
|
|
|
10246
|
19 |
|
$c = $unknown; |
10247
|
|
|
} |
10248
|
|
|
} |
10249
|
|
|
|
10250
|
19 |
|
return \implode('', $chars); |
10251
|
|
|
} |
10252
|
|
|
|
10253
|
|
|
/** |
10254
|
|
|
* @param mixed $str |
10255
|
|
|
* |
10256
|
|
|
* @return bool |
10257
|
|
|
*/ |
10258
|
19 |
|
public static function to_boolean($str): bool |
10259
|
|
|
{ |
10260
|
|
|
// init |
10261
|
19 |
|
$str = (string) $str; |
10262
|
|
|
|
10263
|
19 |
|
if ($str === '') { |
10264
|
2 |
|
return false; |
10265
|
|
|
} |
10266
|
|
|
|
10267
|
17 |
|
$key = \strtolower($str); |
10268
|
|
|
|
10269
|
|
|
// Info: http://php.net/manual/en/filter.filters.validate.php |
10270
|
|
|
$map = [ |
10271
|
17 |
|
'true' => true, |
10272
|
|
|
'1' => true, |
10273
|
|
|
'on' => true, |
10274
|
|
|
'yes' => true, |
10275
|
|
|
'false' => false, |
10276
|
|
|
'0' => false, |
10277
|
|
|
'off' => false, |
10278
|
|
|
'no' => false, |
10279
|
|
|
]; |
10280
|
|
|
|
10281
|
17 |
|
if (isset($map[$key])) { |
10282
|
13 |
|
return $map[$key]; |
10283
|
|
|
} |
10284
|
|
|
|
10285
|
|
|
/** @noinspection CallableParameterUseCaseInTypeContextInspection */ |
10286
|
4 |
|
if (\is_numeric($str)) { |
10287
|
2 |
|
return ((float) $str + 0) > 0; |
10288
|
|
|
} |
10289
|
|
|
|
10290
|
2 |
|
return (bool) self::trim($str); |
10291
|
|
|
} |
10292
|
|
|
|
10293
|
|
|
/** |
10294
|
|
|
* Convert given string to safe filename (and keep string case). |
10295
|
|
|
* |
10296
|
|
|
* @param string $string |
10297
|
|
|
* @param bool $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are |
10298
|
|
|
* simply replaced with hyphen. |
10299
|
|
|
* @param string $fallback_char |
10300
|
|
|
* |
10301
|
|
|
* @return string |
10302
|
|
|
*/ |
10303
|
1 |
|
public static function to_filename(string $string, bool $use_transliterate = false, string $fallback_char = '-'): string |
10304
|
|
|
{ |
10305
|
1 |
|
if ($use_transliterate === true) { |
10306
|
1 |
|
$string = self::str_transliterate($string, $fallback_char); |
10307
|
|
|
} |
10308
|
|
|
|
10309
|
1 |
|
$fallback_char_escaped = \preg_quote($fallback_char, '/'); |
10310
|
|
|
|
10311
|
1 |
|
$string = (string) \preg_replace( |
10312
|
|
|
[ |
10313
|
1 |
|
'/[^' . $fallback_char_escaped . '\.\-a-zA-Z0-9\s]/', // 1) remove un-needed chars |
10314
|
1 |
|
'/[\s]+/', // 2) convert spaces to $fallback_char |
10315
|
1 |
|
'/[' . $fallback_char_escaped . ']+/', // 3) remove double $fallback_char's |
10316
|
|
|
], |
10317
|
|
|
[ |
10318
|
1 |
|
'', |
10319
|
1 |
|
$fallback_char, |
10320
|
1 |
|
$fallback_char, |
10321
|
|
|
], |
10322
|
1 |
|
$string |
10323
|
|
|
); |
10324
|
|
|
|
10325
|
|
|
// trim "$fallback_char" from beginning and end of the string |
10326
|
1 |
|
return \trim($string, $fallback_char); |
10327
|
|
|
} |
10328
|
|
|
|
10329
|
|
|
/** |
10330
|
|
|
* Convert a string into "ISO-8859"-encoding (Latin-1). |
10331
|
|
|
* |
10332
|
|
|
* @param string|string[] $str |
10333
|
|
|
* |
10334
|
|
|
* @return string|string[] |
10335
|
|
|
*/ |
10336
|
7 |
|
public static function to_iso8859($str) |
10337
|
|
|
{ |
10338
|
7 |
|
if (\is_array($str) === true) { |
10339
|
2 |
|
foreach ($str as $k => $v) { |
10340
|
2 |
|
$str[$k] = self::to_iso8859($v); |
10341
|
|
|
} |
10342
|
|
|
|
10343
|
2 |
|
return $str; |
10344
|
|
|
} |
10345
|
|
|
|
10346
|
7 |
|
$str = (string) $str; |
10347
|
7 |
|
if ($str === '') { |
10348
|
2 |
|
return ''; |
10349
|
|
|
} |
10350
|
|
|
|
10351
|
7 |
|
return self::utf8_decode($str); |
10352
|
|
|
} |
10353
|
|
|
|
10354
|
|
|
/** |
10355
|
|
|
* alias for "UTF8::to_iso8859()" |
10356
|
|
|
* |
10357
|
|
|
* @see UTF8::to_iso8859() |
10358
|
|
|
* |
10359
|
|
|
* @param string|string[] $str |
10360
|
|
|
* |
10361
|
|
|
* @return string|string[] |
10362
|
|
|
*/ |
10363
|
2 |
|
public static function to_latin1($str) |
10364
|
|
|
{ |
10365
|
2 |
|
return self::to_iso8859($str); |
10366
|
|
|
} |
10367
|
|
|
|
10368
|
|
|
/** |
10369
|
|
|
* This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8. |
10370
|
|
|
* |
10371
|
|
|
* <ul> |
10372
|
|
|
* <li>It decode UTF-8 codepoints and unicode escape sequences.</li> |
10373
|
|
|
* <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li> |
10374
|
|
|
* <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this |
10375
|
|
|
* case.</li> |
10376
|
|
|
* </ul> |
10377
|
|
|
* |
10378
|
|
|
* @param string|string[] $str <p>Any string or array.</p> |
10379
|
|
|
* @param bool $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p> |
10380
|
|
|
* |
10381
|
|
|
* @return string|string[] the UTF-8 encoded string |
10382
|
|
|
*/ |
10383
|
37 |
|
public static function to_utf8($str, bool $decodeHtmlEntityToUtf8 = false) |
10384
|
|
|
{ |
10385
|
37 |
|
if (\is_array($str) === true) { |
10386
|
4 |
|
foreach ($str as $k => $v) { |
10387
|
4 |
|
$str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8); |
10388
|
|
|
} |
10389
|
|
|
|
10390
|
4 |
|
return $str; |
10391
|
|
|
} |
10392
|
|
|
|
10393
|
37 |
|
$str = (string) $str; |
10394
|
37 |
|
if ($str === '') { |
10395
|
6 |
|
return $str; |
10396
|
|
|
} |
10397
|
|
|
|
10398
|
37 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
10399
|
|
|
self::checkForSupport(); |
10400
|
|
|
} |
10401
|
|
|
|
10402
|
37 |
|
$max = self::strlen_in_byte($str); |
10403
|
37 |
|
$buf = ''; |
10404
|
|
|
|
10405
|
|
|
/** @noinspection ForeachInvariantsInspection */ |
10406
|
37 |
|
for ($i = 0; $i < $max; $i++) { |
10407
|
37 |
|
$c1 = $str[$i]; |
10408
|
|
|
|
10409
|
37 |
|
if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already |
10410
|
|
|
|
10411
|
34 |
|
if ($c1 <= "\xDF") { // looks like 2 bytes UTF8 |
10412
|
|
|
|
10413
|
31 |
|
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; |
10414
|
|
|
|
10415
|
31 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already |
10416
|
17 |
|
$buf .= $c1 . $c2; |
10417
|
17 |
|
$i++; |
10418
|
|
|
} else { // not valid UTF8 - convert it |
10419
|
31 |
|
$buf .= self::to_utf8_convert_helper($c1); |
10420
|
|
|
} |
10421
|
34 |
|
} elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8 |
10422
|
|
|
|
10423
|
32 |
|
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; |
10424
|
32 |
|
$c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2]; |
10425
|
|
|
|
10426
|
32 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already |
10427
|
14 |
|
$buf .= $c1 . $c2 . $c3; |
10428
|
14 |
|
$i += 2; |
10429
|
|
|
} else { // not valid UTF8 - convert it |
10430
|
32 |
|
$buf .= self::to_utf8_convert_helper($c1); |
10431
|
|
|
} |
10432
|
26 |
|
} elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8 |
10433
|
|
|
|
10434
|
26 |
|
$c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; |
10435
|
26 |
|
$c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2]; |
10436
|
26 |
|
$c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3]; |
10437
|
|
|
|
10438
|
26 |
|
if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already |
10439
|
8 |
|
$buf .= $c1 . $c2 . $c3 . $c4; |
10440
|
8 |
|
$i += 3; |
10441
|
|
|
} else { // not valid UTF8 - convert it |
10442
|
26 |
|
$buf .= self::to_utf8_convert_helper($c1); |
10443
|
|
|
} |
10444
|
|
|
} else { // doesn't look like UTF8, but should be converted |
10445
|
34 |
|
$buf .= self::to_utf8_convert_helper($c1); |
10446
|
|
|
} |
10447
|
34 |
|
} elseif (($c1 & "\xC0") === "\x80") { // needs conversion |
10448
|
|
|
|
10449
|
4 |
|
$buf .= self::to_utf8_convert_helper($c1); |
10450
|
|
|
} else { // it doesn't need conversion |
10451
|
34 |
|
$buf .= $c1; |
10452
|
|
|
} |
10453
|
|
|
} |
10454
|
|
|
|
10455
|
|
|
// decode unicode escape sequences |
10456
|
37 |
|
$buf = \preg_replace_callback( |
10457
|
37 |
|
'/\\\\u([0-9a-f]{4})/i', |
10458
|
|
|
function ($match) { |
10459
|
|
|
// always fallback via symfony polyfill |
10460
|
8 |
|
return \mb_convert_encoding(\pack('H*', $match[1]), 'UTF-8', 'UCS-2BE'); |
10461
|
37 |
|
}, |
10462
|
37 |
|
$buf |
10463
|
|
|
); |
10464
|
|
|
|
10465
|
|
|
// decode UTF-8 codepoints |
10466
|
37 |
|
if ($decodeHtmlEntityToUtf8 === true) { |
10467
|
2 |
|
$buf = self::html_entity_decode($buf); |
10468
|
|
|
} |
10469
|
|
|
|
10470
|
37 |
|
return $buf; |
10471
|
|
|
} |
10472
|
|
|
|
10473
|
|
|
/** |
10474
|
|
|
* @param int|string $input |
10475
|
|
|
* |
10476
|
|
|
* @return string |
10477
|
|
|
*/ |
10478
|
30 |
|
private static function to_utf8_convert_helper($input): string |
10479
|
|
|
{ |
10480
|
|
|
// init |
10481
|
30 |
|
$buf = ''; |
10482
|
|
|
|
10483
|
30 |
|
if (self::$ORD === null) { |
10484
|
1 |
|
self::$ORD = self::getData('ord'); |
10485
|
|
|
} |
10486
|
|
|
|
10487
|
30 |
|
if (self::$CHR === null) { |
10488
|
1 |
|
self::$CHR = self::getData('chr'); |
10489
|
|
|
} |
10490
|
|
|
|
10491
|
30 |
|
if (self::$WIN1252_TO_UTF8 === null) { |
10492
|
1 |
|
self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8'); |
10493
|
|
|
} |
10494
|
|
|
|
10495
|
30 |
|
$ordC1 = self::$ORD[$input]; |
10496
|
30 |
|
if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases |
10497
|
30 |
|
$buf .= self::$WIN1252_TO_UTF8[$ordC1]; |
10498
|
|
|
} else { |
10499
|
2 |
|
$cc1 = self::$CHR[$ordC1 / 64] | "\xC0"; |
10500
|
2 |
|
$cc2 = ((string) $input & "\x3F") | "\x80"; |
|
|
|
|
10501
|
2 |
|
$buf .= $cc1 . $cc2; |
10502
|
|
|
} |
10503
|
|
|
|
10504
|
30 |
|
return $buf; |
10505
|
|
|
} |
10506
|
|
|
|
10507
|
|
|
/** |
10508
|
|
|
* Strip whitespace or other characters from beginning or end of a UTF-8 string. |
10509
|
|
|
* |
10510
|
|
|
* INFO: This is slower then "trim()" |
10511
|
|
|
* |
10512
|
|
|
* We can only use the original-function, if we use <= 7-Bit in the string / chars |
10513
|
|
|
* but the check for ACSII (7-Bit) cost more time, then we can safe here. |
10514
|
|
|
* |
10515
|
|
|
* @param string $str <p>The string to be trimmed</p> |
10516
|
|
|
* @param mixed $chars [optional] <p>Optional characters to be stripped</p> |
10517
|
|
|
* |
10518
|
|
|
* @return string the trimmed string |
10519
|
|
|
*/ |
10520
|
214 |
|
public static function trim(string $str = '', $chars = \INF): string |
10521
|
|
|
{ |
10522
|
214 |
|
if ($str === '') { |
10523
|
11 |
|
return ''; |
10524
|
|
|
} |
10525
|
|
|
|
10526
|
|
|
// Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories |
10527
|
206 |
|
if ($chars === \INF || !$chars) { |
10528
|
179 |
|
$pattern = "^[\pZ\pC]+|[\pZ\pC]+\$"; |
10529
|
|
|
} else { |
10530
|
47 |
|
$chars = \preg_quote($chars, '/'); |
10531
|
47 |
|
$pattern = "^[${chars}]+|[${chars}]+\$"; |
10532
|
|
|
} |
10533
|
|
|
|
10534
|
206 |
|
return self::regex_replace($str, $pattern, '', '', '/'); |
10535
|
|
|
} |
10536
|
|
|
|
10537
|
|
|
/** |
10538
|
|
|
* Makes string's first char uppercase. |
10539
|
|
|
* |
10540
|
|
|
* @param string $str <p>The input string.</p> |
10541
|
|
|
* @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> |
10542
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10543
|
|
|
* @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> |
10544
|
|
|
* @param bool $tryToKeepStringLength [optional] <p>true === try to keep the string length: e.g. ẞ -> ß</p> |
10545
|
|
|
* |
10546
|
|
|
* @return string the resulting string |
10547
|
|
|
*/ |
10548
|
79 |
|
public static function ucfirst(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false, string $lang = null, bool $tryToKeepStringLength = false): string |
10549
|
|
|
{ |
10550
|
79 |
|
if ($cleanUtf8 === true) { |
10551
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
10552
|
|
|
// if invalid characters are found in $haystack before $needle |
10553
|
1 |
|
$str = self::clean($str); |
10554
|
|
|
} |
10555
|
|
|
|
10556
|
79 |
|
$strPartTwo = self::substr($str, 1, null, $encoding); |
10557
|
79 |
|
if ($strPartTwo === false) { |
10558
|
|
|
$strPartTwo = ''; |
10559
|
|
|
} |
10560
|
|
|
|
10561
|
79 |
|
$strPartOne = self::strtoupper( |
10562
|
79 |
|
(string) self::substr($str, 0, 1, $encoding), |
10563
|
79 |
|
$encoding, |
10564
|
79 |
|
$cleanUtf8, |
10565
|
79 |
|
$lang, |
10566
|
79 |
|
$tryToKeepStringLength |
10567
|
|
|
); |
10568
|
|
|
|
10569
|
79 |
|
return $strPartOne . $strPartTwo; |
10570
|
|
|
} |
10571
|
|
|
|
10572
|
|
|
/** |
10573
|
|
|
* alias for "UTF8::ucfirst()" |
10574
|
|
|
* |
10575
|
|
|
* @see UTF8::ucfirst() |
10576
|
|
|
* |
10577
|
|
|
* @param string $str |
10578
|
|
|
* @param string $encoding |
10579
|
|
|
* @param bool $cleanUtf8 |
10580
|
|
|
* |
10581
|
|
|
* @return string |
10582
|
|
|
*/ |
10583
|
1 |
|
public static function ucword(string $str, string $encoding = 'UTF-8', bool $cleanUtf8 = false): string |
10584
|
|
|
{ |
10585
|
1 |
|
return self::ucfirst($str, $encoding, $cleanUtf8); |
10586
|
|
|
} |
10587
|
|
|
|
10588
|
|
|
/** |
10589
|
|
|
* Uppercase for all words in the string. |
10590
|
|
|
* |
10591
|
|
|
* @param string $str <p>The input string.</p> |
10592
|
|
|
* @param string[] $exceptions [optional] <p>Exclusion for some words.</p> |
10593
|
|
|
* @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new |
10594
|
|
|
* word.</p> |
10595
|
|
|
* @param string $encoding [optional] <p>Set the charset.</p> |
10596
|
|
|
* @param bool $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p> |
10597
|
|
|
* |
10598
|
|
|
* @return string |
10599
|
|
|
*/ |
10600
|
8 |
|
public static function ucwords(string $str, array $exceptions = [], string $charlist = '', string $encoding = 'UTF-8', bool $cleanUtf8 = false): string |
10601
|
|
|
{ |
10602
|
8 |
|
if (!$str) { |
10603
|
2 |
|
return ''; |
10604
|
|
|
} |
10605
|
|
|
|
10606
|
|
|
// INFO: mb_convert_case($str, MB_CASE_TITLE); |
10607
|
|
|
// -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters |
10608
|
|
|
|
10609
|
7 |
|
if ($cleanUtf8 === true) { |
10610
|
|
|
// "mb_strpos()" and "iconv_strpos()" returns wrong position, |
10611
|
|
|
// if invalid characters are found in $haystack before $needle |
10612
|
1 |
|
$str = self::clean($str); |
10613
|
|
|
} |
10614
|
|
|
|
10615
|
7 |
|
$usePhpDefaultFunctions = !(bool) ($charlist . \implode('', $exceptions)); |
10616
|
|
|
|
10617
|
|
|
if ( |
10618
|
7 |
|
$usePhpDefaultFunctions === true |
10619
|
|
|
&& |
10620
|
7 |
|
self::is_ascii($str) === true |
10621
|
|
|
) { |
10622
|
|
|
return \ucwords($str); |
10623
|
|
|
} |
10624
|
|
|
|
10625
|
7 |
|
$words = self::str_to_words($str, $charlist); |
10626
|
7 |
|
$newWords = []; |
10627
|
|
|
|
10628
|
7 |
|
if (\count($exceptions) > 0) { |
10629
|
1 |
|
$useExceptions = true; |
10630
|
|
|
} else { |
10631
|
7 |
|
$useExceptions = false; |
10632
|
|
|
} |
10633
|
|
|
|
10634
|
7 |
|
foreach ($words as $word) { |
10635
|
7 |
|
if (!$word) { |
10636
|
7 |
|
continue; |
10637
|
|
|
} |
10638
|
|
|
|
10639
|
|
|
if ( |
10640
|
7 |
|
$useExceptions === false |
10641
|
|
|
|| |
10642
|
|
|
( |
10643
|
1 |
|
$useExceptions === true |
10644
|
|
|
&& |
10645
|
7 |
|
!\in_array($word, $exceptions, true) |
10646
|
|
|
) |
10647
|
|
|
) { |
10648
|
7 |
|
$word = self::ucfirst($word, $encoding); |
10649
|
|
|
} |
10650
|
|
|
|
10651
|
7 |
|
$newWords[] = $word; |
10652
|
|
|
} |
10653
|
|
|
|
10654
|
7 |
|
return \implode('', $newWords); |
10655
|
|
|
} |
10656
|
|
|
|
10657
|
|
|
/** |
10658
|
|
|
* Multi decode html entity & fix urlencoded-win1252-chars. |
10659
|
|
|
* |
10660
|
|
|
* e.g: |
10661
|
|
|
* 'test+test' => 'test test' |
10662
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
10663
|
|
|
* 'D%FCsseldorf' => 'Düsseldorf' |
10664
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
10665
|
|
|
* 'D%26%23xFC%3Bsseldorf' => 'Düsseldorf' |
10666
|
|
|
* 'Düsseldorf' => 'Düsseldorf' |
10667
|
|
|
* 'D%C3%BCsseldorf' => 'Düsseldorf' |
10668
|
|
|
* 'D%C3%83%C2%BCsseldorf' => 'Düsseldorf' |
10669
|
|
|
* 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf' |
10670
|
|
|
* |
10671
|
|
|
* @param string $str <p>The input string.</p> |
10672
|
|
|
* @param bool $multi_decode <p>Decode as often as possible.</p> |
10673
|
|
|
* |
10674
|
|
|
* @return string |
10675
|
|
|
*/ |
10676
|
2 |
|
public static function urldecode(string $str, bool $multi_decode = true): string |
10677
|
|
|
{ |
10678
|
2 |
|
if ($str === '') { |
10679
|
2 |
|
return ''; |
10680
|
|
|
} |
10681
|
|
|
|
10682
|
2 |
|
$pattern = '/%u([0-9a-f]{3,4})/i'; |
10683
|
2 |
|
if (\preg_match($pattern, $str)) { |
10684
|
2 |
|
$str = (string) \preg_replace($pattern, '&#x\\1;', \urldecode($str)); |
10685
|
|
|
} |
10686
|
|
|
|
10687
|
2 |
|
$flags = \ENT_QUOTES | \ENT_HTML5; |
10688
|
|
|
|
10689
|
|
|
do { |
10690
|
2 |
|
$str_compare = $str; |
10691
|
|
|
|
10692
|
2 |
|
$str = self::fix_simple_utf8( |
10693
|
2 |
|
\urldecode( |
10694
|
2 |
|
self::html_entity_decode( |
10695
|
2 |
|
self::to_utf8($str), |
10696
|
2 |
|
$flags |
10697
|
|
|
) |
10698
|
|
|
) |
10699
|
|
|
); |
10700
|
2 |
|
} while ($multi_decode === true && $str_compare !== $str); |
10701
|
|
|
|
10702
|
2 |
|
return $str; |
10703
|
|
|
} |
10704
|
|
|
|
10705
|
|
|
/** |
10706
|
|
|
* Return a array with "urlencoded"-win1252 -> UTF-8 |
10707
|
|
|
* |
10708
|
|
|
* @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p> |
10709
|
|
|
* |
10710
|
|
|
* @return string[] |
10711
|
|
|
*/ |
10712
|
2 |
|
public static function urldecode_fix_win1252_chars(): array |
10713
|
|
|
{ |
10714
|
|
|
return [ |
10715
|
2 |
|
'%20' => ' ', |
10716
|
|
|
'%21' => '!', |
10717
|
|
|
'%22' => '"', |
10718
|
|
|
'%23' => '#', |
10719
|
|
|
'%24' => '$', |
10720
|
|
|
'%25' => '%', |
10721
|
|
|
'%26' => '&', |
10722
|
|
|
'%27' => "'", |
10723
|
|
|
'%28' => '(', |
10724
|
|
|
'%29' => ')', |
10725
|
|
|
'%2A' => '*', |
10726
|
|
|
'%2B' => '+', |
10727
|
|
|
'%2C' => ',', |
10728
|
|
|
'%2D' => '-', |
10729
|
|
|
'%2E' => '.', |
10730
|
|
|
'%2F' => '/', |
10731
|
|
|
'%30' => '0', |
10732
|
|
|
'%31' => '1', |
10733
|
|
|
'%32' => '2', |
10734
|
|
|
'%33' => '3', |
10735
|
|
|
'%34' => '4', |
10736
|
|
|
'%35' => '5', |
10737
|
|
|
'%36' => '6', |
10738
|
|
|
'%37' => '7', |
10739
|
|
|
'%38' => '8', |
10740
|
|
|
'%39' => '9', |
10741
|
|
|
'%3A' => ':', |
10742
|
|
|
'%3B' => ';', |
10743
|
|
|
'%3C' => '<', |
10744
|
|
|
'%3D' => '=', |
10745
|
|
|
'%3E' => '>', |
10746
|
|
|
'%3F' => '?', |
10747
|
|
|
'%40' => '@', |
10748
|
|
|
'%41' => 'A', |
10749
|
|
|
'%42' => 'B', |
10750
|
|
|
'%43' => 'C', |
10751
|
|
|
'%44' => 'D', |
10752
|
|
|
'%45' => 'E', |
10753
|
|
|
'%46' => 'F', |
10754
|
|
|
'%47' => 'G', |
10755
|
|
|
'%48' => 'H', |
10756
|
|
|
'%49' => 'I', |
10757
|
|
|
'%4A' => 'J', |
10758
|
|
|
'%4B' => 'K', |
10759
|
|
|
'%4C' => 'L', |
10760
|
|
|
'%4D' => 'M', |
10761
|
|
|
'%4E' => 'N', |
10762
|
|
|
'%4F' => 'O', |
10763
|
|
|
'%50' => 'P', |
10764
|
|
|
'%51' => 'Q', |
10765
|
|
|
'%52' => 'R', |
10766
|
|
|
'%53' => 'S', |
10767
|
|
|
'%54' => 'T', |
10768
|
|
|
'%55' => 'U', |
10769
|
|
|
'%56' => 'V', |
10770
|
|
|
'%57' => 'W', |
10771
|
|
|
'%58' => 'X', |
10772
|
|
|
'%59' => 'Y', |
10773
|
|
|
'%5A' => 'Z', |
10774
|
|
|
'%5B' => '[', |
10775
|
|
|
'%5C' => '\\', |
10776
|
|
|
'%5D' => ']', |
10777
|
|
|
'%5E' => '^', |
10778
|
|
|
'%5F' => '_', |
10779
|
|
|
'%60' => '`', |
10780
|
|
|
'%61' => 'a', |
10781
|
|
|
'%62' => 'b', |
10782
|
|
|
'%63' => 'c', |
10783
|
|
|
'%64' => 'd', |
10784
|
|
|
'%65' => 'e', |
10785
|
|
|
'%66' => 'f', |
10786
|
|
|
'%67' => 'g', |
10787
|
|
|
'%68' => 'h', |
10788
|
|
|
'%69' => 'i', |
10789
|
|
|
'%6A' => 'j', |
10790
|
|
|
'%6B' => 'k', |
10791
|
|
|
'%6C' => 'l', |
10792
|
|
|
'%6D' => 'm', |
10793
|
|
|
'%6E' => 'n', |
10794
|
|
|
'%6F' => 'o', |
10795
|
|
|
'%70' => 'p', |
10796
|
|
|
'%71' => 'q', |
10797
|
|
|
'%72' => 'r', |
10798
|
|
|
'%73' => 's', |
10799
|
|
|
'%74' => 't', |
10800
|
|
|
'%75' => 'u', |
10801
|
|
|
'%76' => 'v', |
10802
|
|
|
'%77' => 'w', |
10803
|
|
|
'%78' => 'x', |
10804
|
|
|
'%79' => 'y', |
10805
|
|
|
'%7A' => 'z', |
10806
|
|
|
'%7B' => '{', |
10807
|
|
|
'%7C' => '|', |
10808
|
|
|
'%7D' => '}', |
10809
|
|
|
'%7E' => '~', |
10810
|
|
|
'%7F' => '', |
10811
|
|
|
'%80' => '`', |
10812
|
|
|
'%81' => '', |
10813
|
|
|
'%82' => '‚', |
10814
|
|
|
'%83' => 'ƒ', |
10815
|
|
|
'%84' => '„', |
10816
|
|
|
'%85' => '…', |
10817
|
|
|
'%86' => '†', |
10818
|
|
|
'%87' => '‡', |
10819
|
|
|
'%88' => 'ˆ', |
10820
|
|
|
'%89' => '‰', |
10821
|
|
|
'%8A' => 'Š', |
10822
|
|
|
'%8B' => '‹', |
10823
|
|
|
'%8C' => 'Œ', |
10824
|
|
|
'%8D' => '', |
10825
|
|
|
'%8E' => 'Ž', |
10826
|
|
|
'%8F' => '', |
10827
|
|
|
'%90' => '', |
10828
|
|
|
'%91' => '‘', |
10829
|
|
|
'%92' => '’', |
10830
|
|
|
'%93' => '“', |
10831
|
|
|
'%94' => '”', |
10832
|
|
|
'%95' => '•', |
10833
|
|
|
'%96' => '–', |
10834
|
|
|
'%97' => '—', |
10835
|
|
|
'%98' => '˜', |
10836
|
|
|
'%99' => '™', |
10837
|
|
|
'%9A' => 'š', |
10838
|
|
|
'%9B' => '›', |
10839
|
|
|
'%9C' => 'œ', |
10840
|
|
|
'%9D' => '', |
10841
|
|
|
'%9E' => 'ž', |
10842
|
|
|
'%9F' => 'Ÿ', |
10843
|
|
|
'%A0' => '', |
10844
|
|
|
'%A1' => '¡', |
10845
|
|
|
'%A2' => '¢', |
10846
|
|
|
'%A3' => '£', |
10847
|
|
|
'%A4' => '¤', |
10848
|
|
|
'%A5' => '¥', |
10849
|
|
|
'%A6' => '¦', |
10850
|
|
|
'%A7' => '§', |
10851
|
|
|
'%A8' => '¨', |
10852
|
|
|
'%A9' => '©', |
10853
|
|
|
'%AA' => 'ª', |
10854
|
|
|
'%AB' => '«', |
10855
|
|
|
'%AC' => '¬', |
10856
|
|
|
'%AD' => '', |
10857
|
|
|
'%AE' => '®', |
10858
|
|
|
'%AF' => '¯', |
10859
|
|
|
'%B0' => '°', |
10860
|
|
|
'%B1' => '±', |
10861
|
|
|
'%B2' => '²', |
10862
|
|
|
'%B3' => '³', |
10863
|
|
|
'%B4' => '´', |
10864
|
|
|
'%B5' => 'µ', |
10865
|
|
|
'%B6' => '¶', |
10866
|
|
|
'%B7' => '·', |
10867
|
|
|
'%B8' => '¸', |
10868
|
|
|
'%B9' => '¹', |
10869
|
|
|
'%BA' => 'º', |
10870
|
|
|
'%BB' => '»', |
10871
|
|
|
'%BC' => '¼', |
10872
|
|
|
'%BD' => '½', |
10873
|
|
|
'%BE' => '¾', |
10874
|
|
|
'%BF' => '¿', |
10875
|
|
|
'%C0' => 'À', |
10876
|
|
|
'%C1' => 'Á', |
10877
|
|
|
'%C2' => 'Â', |
10878
|
|
|
'%C3' => 'Ã', |
10879
|
|
|
'%C4' => 'Ä', |
10880
|
|
|
'%C5' => 'Å', |
10881
|
|
|
'%C6' => 'Æ', |
10882
|
|
|
'%C7' => 'Ç', |
10883
|
|
|
'%C8' => 'È', |
10884
|
|
|
'%C9' => 'É', |
10885
|
|
|
'%CA' => 'Ê', |
10886
|
|
|
'%CB' => 'Ë', |
10887
|
|
|
'%CC' => 'Ì', |
10888
|
|
|
'%CD' => 'Í', |
10889
|
|
|
'%CE' => 'Î', |
10890
|
|
|
'%CF' => 'Ï', |
10891
|
|
|
'%D0' => 'Ð', |
10892
|
|
|
'%D1' => 'Ñ', |
10893
|
|
|
'%D2' => 'Ò', |
10894
|
|
|
'%D3' => 'Ó', |
10895
|
|
|
'%D4' => 'Ô', |
10896
|
|
|
'%D5' => 'Õ', |
10897
|
|
|
'%D6' => 'Ö', |
10898
|
|
|
'%D7' => '×', |
10899
|
|
|
'%D8' => 'Ø', |
10900
|
|
|
'%D9' => 'Ù', |
10901
|
|
|
'%DA' => 'Ú', |
10902
|
|
|
'%DB' => 'Û', |
10903
|
|
|
'%DC' => 'Ü', |
10904
|
|
|
'%DD' => 'Ý', |
10905
|
|
|
'%DE' => 'Þ', |
10906
|
|
|
'%DF' => 'ß', |
10907
|
|
|
'%E0' => 'à', |
10908
|
|
|
'%E1' => 'á', |
10909
|
|
|
'%E2' => 'â', |
10910
|
|
|
'%E3' => 'ã', |
10911
|
|
|
'%E4' => 'ä', |
10912
|
|
|
'%E5' => 'å', |
10913
|
|
|
'%E6' => 'æ', |
10914
|
|
|
'%E7' => 'ç', |
10915
|
|
|
'%E8' => 'è', |
10916
|
|
|
'%E9' => 'é', |
10917
|
|
|
'%EA' => 'ê', |
10918
|
|
|
'%EB' => 'ë', |
10919
|
|
|
'%EC' => 'ì', |
10920
|
|
|
'%ED' => 'í', |
10921
|
|
|
'%EE' => 'î', |
10922
|
|
|
'%EF' => 'ï', |
10923
|
|
|
'%F0' => 'ð', |
10924
|
|
|
'%F1' => 'ñ', |
10925
|
|
|
'%F2' => 'ò', |
10926
|
|
|
'%F3' => 'ó', |
10927
|
|
|
'%F4' => 'ô', |
10928
|
|
|
'%F5' => 'õ', |
10929
|
|
|
'%F6' => 'ö', |
10930
|
|
|
'%F7' => '÷', |
10931
|
|
|
'%F8' => 'ø', |
10932
|
|
|
'%F9' => 'ù', |
10933
|
|
|
'%FA' => 'ú', |
10934
|
|
|
'%FB' => 'û', |
10935
|
|
|
'%FC' => 'ü', |
10936
|
|
|
'%FD' => 'ý', |
10937
|
|
|
'%FE' => 'þ', |
10938
|
|
|
'%FF' => 'ÿ', |
10939
|
|
|
]; |
10940
|
|
|
} |
10941
|
|
|
|
10942
|
|
|
/** |
10943
|
|
|
* Decodes an UTF-8 string to ISO-8859-1. |
10944
|
|
|
* |
10945
|
|
|
* @param string $str <p>The input string.</p> |
10946
|
|
|
* @param bool $keepUtf8Chars |
10947
|
|
|
* |
10948
|
|
|
* @return string |
10949
|
|
|
*/ |
10950
|
13 |
|
public static function utf8_decode(string $str, bool $keepUtf8Chars = false): string |
10951
|
|
|
{ |
10952
|
13 |
|
if ($str === '') { |
10953
|
5 |
|
return ''; |
10954
|
|
|
} |
10955
|
|
|
|
10956
|
13 |
|
static $UTF8_TO_WIN1252_KEYS_CACHE = null; |
10957
|
13 |
|
static $UTF8_TO_WIN1252_VALUES_CACHE = null; |
10958
|
|
|
|
10959
|
13 |
|
if ($UTF8_TO_WIN1252_KEYS_CACHE === null) { |
10960
|
1 |
|
if (self::$WIN1252_TO_UTF8 === null) { |
10961
|
|
|
self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8'); |
10962
|
|
|
} |
10963
|
|
|
|
10964
|
1 |
|
$UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8); |
10965
|
1 |
|
$UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8); |
10966
|
|
|
} |
10967
|
|
|
|
10968
|
|
|
/** @noinspection PhpInternalEntityUsedInspection */ |
10969
|
13 |
|
$str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str); |
10970
|
|
|
|
10971
|
13 |
|
if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { |
10972
|
|
|
self::checkForSupport(); |
10973
|
|
|
} |
10974
|
|
|
|
10975
|
|
|
// save for later comparision |
10976
|
13 |
|
$str_backup = $str; |
10977
|
13 |
|
$len = self::strlen_in_byte($str); |
10978
|
|
|
|
10979
|
13 |
|
if (self::$ORD === null) { |
10980
|
|
|
self::$ORD = self::getData('ord'); |
10981
|
|
|
} |
10982
|
|
|
|
10983
|
13 |
|
if (self::$CHR === null) { |
10984
|
|
|
self::$CHR = self::getData('chr'); |
10985
|
|
|
} |
10986
|
|
|
|
10987
|
13 |
|
$noCharFound = '?'; |
10988
|
|
|
/** @noinspection ForeachInvariantsInspection */ |
10989
|
13 |
|
for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) { |
10990
|
13 |
|
switch ($str[$i] & "\xF0") { |
10991
|
13 |
|
case "\xC0": |
10992
|
12 |
|
case "\xD0": |
10993
|
13 |
|
$c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"]; |
10994
|
13 |
|
$str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound; |
10995
|
|
|
|
10996
|
13 |
|
break; |
10997
|
|
|
|
10998
|
|
|
/** @noinspection PhpMissingBreakStatementInspection */ |
10999
|
12 |
|
case "\xF0": |
11000
|
|
|
++$i; |
11001
|
|
|
// no break |
11002
|
12 |
|
case "\xE0": |
11003
|
10 |
|
$str[$j] = $noCharFound; |
11004
|
10 |
|
$i += 2; |
11005
|
|
|
|
11006
|
10 |
|
break; |
11007
|
|
|
|
11008
|
|
|
default: |
11009
|
12 |
|
$str[$j] = $str[$i]; |
11010
|
|
|
} |
11011
|
|
|
} |
11012
|
|
|
|
11013
|
13 |
|
$return = self::substr_in_byte($str, 0, $j); |
11014
|
13 |
|
if ($return === false) { |
|
|
|
|
11015
|
|
|
$return = ''; |
11016
|
|
|
} |
11017
|
|
|
|
11018
|
|
|
if ( |
11019
|
13 |
|
$keepUtf8Chars === true |
11020
|
|
|
&& |
11021
|
13 |
|
self::strlen($return) >= self::strlen($str_backup) |
11022
|
|
|
) { |
11023
|
2 |
|
return $str_backup; |
11024
|
|
|
} |
11025
|
|
|
|
11026
|
13 |
|
return $return; |
11027
|
|
|
} |
11028
|
|
|
|
11029
|
|
|
/** |
11030
|
|
|
* Encodes an ISO-8859-1 string to UTF-8. |
11031
|
|
|
* |
11032
|
|
|
* @param string $str <p>The input string.</p> |
11033
|
|
|
* |
11034
|
|
|
* @return string |
11035
|
|
|
*/ |
11036
|
14 |
|
public static function utf8_encode(string $str): string |
11037
|
|
|
{ |
11038
|
14 |
|
if ($str === '') { |
11039
|
13 |
|
return ''; |
11040
|
|
|
} |
11041
|
|
|
|
11042
|
14 |
|
$str = \utf8_encode($str); |
11043
|
|
|
|
11044
|
|
|
// the polyfill maybe return false |
11045
|
|
|
/** @noinspection CallableParameterUseCaseInTypeContextInspection */ |
11046
|
14 |
|
if ($str === false) { |
11047
|
|
|
return ''; |
11048
|
|
|
} |
11049
|
|
|
|
11050
|
14 |
|
if (\strpos($str, "\xC2") === false) { |
11051
|
6 |
|
return $str; |
11052
|
|
|
} |
11053
|
|
|
|
11054
|
12 |
|
static $WIN1252_TO_UTF8_KEYS_CACHE = null; |
11055
|
12 |
|
static $WIN1252_TO_UTF8_VALUES_CACHE = null; |
11056
|
|
|
|
11057
|
12 |
|
if ($WIN1252_TO_UTF8_KEYS_CACHE === null) { |
11058
|
1 |
|
if (self::$WIN1252_TO_UTF8 === null) { |
11059
|
|
|
self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8'); |
11060
|
|
|
} |
11061
|
|
|
|
11062
|
1 |
|
$WIN1252_TO_UTF8_KEYS_CACHE = \array_keys(self::$WIN1252_TO_UTF8); |
11063
|
1 |
|
$WIN1252_TO_UTF8_VALUES_CACHE = \array_values(self::$WIN1252_TO_UTF8); |
11064
|
|
|
} |
11065
|
|
|
|
11066
|
12 |
|
return \str_replace($WIN1252_TO_UTF8_KEYS_CACHE, $WIN1252_TO_UTF8_VALUES_CACHE, $str); |
11067
|
|
|
} |
11068
|
|
|
|
11069
|
|
|
/** |
11070
|
|
|
* fix -> utf8-win1252 chars |
11071
|
|
|
* |
11072
|
|
|
* @param string $str <p>The input string.</p> |
11073
|
|
|
* |
11074
|
|
|
* @return string |
11075
|
|
|
* |
11076
|
|
|
* @deprecated <p>use "UTF8::fix_simple_utf8()"</p> |
11077
|
|
|
*/ |
11078
|
2 |
|
public static function utf8_fix_win1252_chars(string $str): string |
11079
|
|
|
{ |
11080
|
2 |
|
return self::fix_simple_utf8($str); |
11081
|
|
|
} |
11082
|
|
|
|
11083
|
|
|
/** |
11084
|
|
|
* Returns an array with all utf8 whitespace characters. |
11085
|
|
|
* |
11086
|
|
|
* @see : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html |
11087
|
|
|
* |
11088
|
|
|
* @author: Derek E. [email protected] |
11089
|
|
|
* |
11090
|
|
|
* @return string[] |
11091
|
|
|
* An array with all known whitespace characters as values and the type of whitespace as keys |
11092
|
|
|
* as defined in above URL |
11093
|
|
|
*/ |
11094
|
2 |
|
public static function whitespace_table(): array |
11095
|
|
|
{ |
11096
|
2 |
|
return self::$WHITESPACE_TABLE; |
11097
|
|
|
} |
11098
|
|
|
|
11099
|
|
|
/** |
11100
|
|
|
* Limit the number of words in a string. |
11101
|
|
|
* |
11102
|
|
|
* @param string $str <p>The input string.</p> |
11103
|
|
|
* @param int $limit <p>The limit of words as integer.</p> |
11104
|
|
|
* @param string $strAddOn <p>Replacement for the striped string.</p> |
11105
|
|
|
* |
11106
|
|
|
* @return string |
11107
|
|
|
*/ |
11108
|
2 |
|
public static function words_limit(string $str, int $limit = 100, string $strAddOn = '…'): string |
11109
|
|
|
{ |
11110
|
2 |
|
if ($str === '') { |
11111
|
2 |
|
return ''; |
11112
|
|
|
} |
11113
|
|
|
|
11114
|
2 |
|
if ($limit < 1) { |
11115
|
2 |
|
return ''; |
11116
|
|
|
} |
11117
|
|
|
|
11118
|
2 |
|
\preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches); |
11119
|
|
|
|
11120
|
|
|
if ( |
11121
|
2 |
|
!isset($matches[0]) |
11122
|
|
|
|| |
11123
|
2 |
|
self::strlen($str) === self::strlen($matches[0]) |
11124
|
|
|
) { |
11125
|
2 |
|
return $str; |
11126
|
|
|
} |
11127
|
|
|
|
11128
|
2 |
|
return self::rtrim($matches[0]) . $strAddOn; |
11129
|
|
|
} |
11130
|
|
|
|
11131
|
|
|
/** |
11132
|
|
|
* Wraps a string to a given number of characters |
11133
|
|
|
* |
11134
|
|
|
* @see http://php.net/manual/en/function.wordwrap.php |
11135
|
|
|
* |
11136
|
|
|
* @param string $str <p>The input string.</p> |
11137
|
|
|
* @param int $width [optional] <p>The column width.</p> |
11138
|
|
|
* @param string $break [optional] <p>The line is broken using the optional break parameter.</p> |
11139
|
|
|
* @param bool $cut [optional] <p> |
11140
|
|
|
* If the cut is set to true, the string is |
11141
|
|
|
* always wrapped at or before the specified width. So if you have |
11142
|
|
|
* a word that is larger than the given width, it is broken apart. |
11143
|
|
|
* </p> |
11144
|
|
|
* |
11145
|
|
|
* @return string the given string wrapped at the specified column |
11146
|
|
|
*/ |
11147
|
10 |
|
public static function wordwrap(string $str, int $width = 75, string $break = "\n", bool $cut = false): string |
11148
|
|
|
{ |
11149
|
10 |
|
if ($str === '' || $break === '') { |
11150
|
3 |
|
return ''; |
11151
|
|
|
} |
11152
|
|
|
|
11153
|
8 |
|
$w = ''; |
11154
|
8 |
|
$strSplit = \explode($break, $str); |
11155
|
8 |
|
if ($strSplit === false) { |
11156
|
|
|
$count = 0; |
11157
|
|
|
} else { |
11158
|
8 |
|
$count = \count($strSplit); |
11159
|
|
|
} |
11160
|
|
|
|
11161
|
8 |
|
$chars = []; |
11162
|
|
|
/** @noinspection ForeachInvariantsInspection */ |
11163
|
8 |
|
for ($i = 0; $i < $count; ++$i) { |
11164
|
8 |
|
if ($i) { |
11165
|
1 |
|
$chars[] = $break; |
11166
|
1 |
|
$w .= '#'; |
11167
|
|
|
} |
11168
|
|
|
|
11169
|
8 |
|
$c = $strSplit[$i]; |
11170
|
8 |
|
unset($strSplit[$i]); |
11171
|
|
|
|
11172
|
8 |
|
if ($c !== null) { |
11173
|
8 |
|
foreach (self::split($c) as $c) { |
11174
|
8 |
|
$chars[] = $c; |
11175
|
8 |
|
$w .= $c === ' ' ? ' ' : '?'; |
11176
|
|
|
} |
11177
|
|
|
} |
11178
|
|
|
} |
11179
|
|
|
|
11180
|
8 |
|
$strReturn = ''; |
11181
|
8 |
|
$j = 0; |
11182
|
8 |
|
$b = $i = -1; |
11183
|
8 |
|
$w = \wordwrap($w, $width, '#', $cut); |
11184
|
|
|
|
11185
|
8 |
|
while (false !== $b = self::strpos($w, '#', $b + 1)) { |
11186
|
6 |
|
for (++$i; $i < $b; ++$i) { |
11187
|
6 |
|
$strReturn .= $chars[$j]; |
11188
|
6 |
|
unset($chars[$j++]); |
11189
|
|
|
} |
11190
|
|
|
|
11191
|
6 |
|
if ($break === $chars[$j] || $chars[$j] === ' ') { |
11192
|
3 |
|
unset($chars[$j++]); |
11193
|
|
|
} |
11194
|
|
|
|
11195
|
6 |
|
$strReturn .= $break; |
11196
|
|
|
} |
11197
|
|
|
|
11198
|
8 |
|
return $strReturn . \implode('', $chars); |
11199
|
|
|
} |
11200
|
|
|
|
11201
|
|
|
/** |
11202
|
|
|
* Line-Wrap the string after $limit, but also after the next word. |
11203
|
|
|
* |
11204
|
|
|
* @param string $str |
11205
|
|
|
* @param int $limit |
11206
|
|
|
* |
11207
|
|
|
* @return string |
11208
|
|
|
*/ |
11209
|
1 |
|
public static function wordwrap_per_line(string $str, int $limit): string |
11210
|
|
|
{ |
11211
|
1 |
|
$strings = (array) \preg_split('/\\r\\n|\\r|\\n/', $str); |
11212
|
|
|
|
11213
|
1 |
|
$string = ''; |
11214
|
1 |
|
foreach ($strings as $value) { |
11215
|
1 |
|
if ($value === false) { |
11216
|
|
|
continue; |
11217
|
|
|
} |
11218
|
|
|
|
11219
|
1 |
|
$string .= \wordwrap($value, $limit); |
11220
|
1 |
|
$string .= "\n"; |
11221
|
|
|
} |
11222
|
|
|
|
11223
|
1 |
|
return $string; |
11224
|
|
|
} |
11225
|
|
|
|
11226
|
|
|
/** |
11227
|
|
|
* Returns an array of Unicode White Space characters. |
11228
|
|
|
* |
11229
|
|
|
* @return string[] an array with numeric code point as key and White Space Character as value |
11230
|
|
|
*/ |
11231
|
2 |
|
public static function ws(): array |
11232
|
|
|
{ |
11233
|
2 |
|
return self::$WHITESPACE; |
11234
|
|
|
} |
11235
|
|
|
} |
11236
|
|
|
|