1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace Yiisoft\Strings; |
||
6 | |||
7 | use InvalidArgumentException; |
||
8 | |||
9 | use function array_map; |
||
10 | use function array_slice; |
||
11 | use function base64_decode; |
||
12 | use function base64_encode; |
||
13 | use function ceil; |
||
14 | use function count; |
||
15 | use function implode; |
||
16 | use function max; |
||
17 | use function mb_strlen; |
||
18 | use function mb_strrpos; |
||
19 | use function mb_strtolower; |
||
20 | use function mb_strtoupper; |
||
21 | use function mb_substr; |
||
22 | use function preg_match; |
||
23 | use function preg_quote; |
||
24 | use function preg_replace; |
||
25 | use function preg_split; |
||
26 | use function rtrim; |
||
27 | use function sprintf; |
||
28 | use function str_ends_with; |
||
29 | use function str_repeat; |
||
30 | use function str_replace; |
||
31 | use function str_starts_with; |
||
32 | use function strlen; |
||
33 | use function strtr; |
||
34 | use function trim; |
||
35 | |||
36 | /** |
||
37 | * Provides static methods to work with strings. |
||
38 | */ |
||
39 | final class StringHelper |
||
40 | { |
||
41 | public const DEFAULT_WHITESPACE_PATTERN = "\pC\pZ"; |
||
42 | |||
43 | /** |
||
44 | * Returns the number of bytes in the given string. |
||
45 | * This method ensures the string is treated as a byte array even if `mbstring.func_overload` is turned on |
||
46 | * by using {@see mb_strlen()}. |
||
47 | * |
||
48 | * @param string|null $input The string being measured for length. |
||
49 | * |
||
50 | * @return int The number of bytes in the given string. |
||
51 | */ |
||
52 | 2 | public static function byteLength(string|null $input): int |
|
53 | { |
||
54 | 2 | return mb_strlen((string)$input, '8bit'); |
|
55 | } |
||
56 | |||
57 | /** |
||
58 | * Returns the portion of string specified by the start and length parameters. |
||
59 | * This method ensures the string is treated as a byte array by using `mb_substr()`. |
||
60 | * |
||
61 | * @param string $input The input string. Must be one character or longer. |
||
62 | * @param int $start The starting position. |
||
63 | * @param int|null $length The desired portion length. If not specified or `null`, there will be |
||
64 | * no limit on length i.e. the output will be until the end of the string. |
||
65 | * |
||
66 | * @return string The extracted part of string, or FALSE on failure or an empty string. |
||
67 | * |
||
68 | * @see https://www.php.net/manual/en/function.substr.php |
||
69 | */ |
||
70 | 1 | public static function byteSubstring(string $input, int $start, int $length = null): string |
|
71 | { |
||
72 | 1 | return mb_substr($input, $start, $length ?? mb_strlen($input, '8bit'), '8bit'); |
|
73 | } |
||
74 | |||
75 | /** |
||
76 | * Returns the trailing name component of a path. |
||
77 | * This method is similar to the php function `basename()` except that it will |
||
78 | * treat both \ and / as directory separators, independent of the operating system. |
||
79 | * This method was mainly created to work on php namespaces. When working with real |
||
80 | * file paths, PHP's `basename()` should work fine for you. |
||
81 | * Note: this method is not aware of the actual filesystem, or path components such as "..". |
||
82 | * |
||
83 | * @param string $path A path string. |
||
84 | * @param string $suffix If the name component ends in suffix this will also be cut off. |
||
85 | * |
||
86 | * @return string The trailing name component of the given path. |
||
87 | * |
||
88 | * @see https://www.php.net/manual/en/function.basename.php |
||
89 | */ |
||
90 | 1 | public static function baseName(string $path, string $suffix = ''): string |
|
91 | { |
||
92 | 1 | $length = mb_strlen($suffix); |
|
93 | 1 | if ($length > 0 && mb_substr($path, -$length) === $suffix) { |
|
94 | 1 | $path = mb_substr($path, 0, -$length); |
|
95 | } |
||
96 | 1 | $path = rtrim(str_replace('\\', '/', $path), '/\\'); |
|
97 | 1 | $position = mb_strrpos($path, '/'); |
|
98 | 1 | if ($position !== false) { |
|
99 | 1 | return mb_substr($path, $position + 1); |
|
100 | } |
||
101 | |||
102 | 1 | return $path; |
|
103 | } |
||
104 | |||
105 | /** |
||
106 | * Returns parent directory's path. |
||
107 | * This method is similar to `dirname()` except that it will treat |
||
108 | * both \ and / as directory separators, independent of the operating system. |
||
109 | * |
||
110 | * @param string $path A path string. |
||
111 | * |
||
112 | * @return string The parent directory's path. |
||
113 | * |
||
114 | * @see https://www.php.net/manual/en/function.basename.php |
||
115 | */ |
||
116 | 1 | public static function directoryName(string $path): string |
|
117 | { |
||
118 | 1 | $position = mb_strrpos(str_replace('\\', '/', $path), '/'); |
|
119 | 1 | if ($position !== false) { |
|
120 | 1 | return mb_substr($path, 0, $position); |
|
121 | } |
||
122 | |||
123 | 1 | return ''; |
|
124 | } |
||
125 | |||
126 | /** |
||
127 | * Get part of string. |
||
128 | * |
||
129 | * @param string $string To get substring from. |
||
130 | * @param int $start Character to start at. |
||
131 | * @param int|null $length Number of characters to get. |
||
132 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||
133 | * |
||
134 | * @see https://php.net/manual/en/function.mb-substr.php |
||
135 | */ |
||
136 | 15 | public static function substring(string $string, int $start, int $length = null, string $encoding = 'UTF-8'): string |
|
137 | { |
||
138 | 15 | return mb_substr($string, $start, $length, $encoding); |
|
139 | } |
||
140 | |||
141 | /** |
||
142 | * Replace text within a portion of a string. |
||
143 | * |
||
144 | * @param string $string The input string. |
||
145 | * @param string $replacement The replacement string. |
||
146 | * @param int $start Position to begin replacing substring at. |
||
147 | * If start is non-negative, the replacing will begin at the start'th offset into string. |
||
148 | * If start is negative, the replacing will begin at the start'th character from the end of string. |
||
149 | * @param int|null $length Length of the substring to be replaced. |
||
150 | * If given and is positive, it represents the length of the portion of string which is to be replaced. |
||
151 | * If it is negative, it represents the number of characters from the end of string at which to stop replacing. |
||
152 | * If it is not given, then it will default to the length of the string; i.e. end the replacing at the end of string. |
||
153 | * If length is zero then this function will have the effect of inserting replacement into string at the given start offset. |
||
154 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||
155 | */ |
||
156 | 9 | public static function replaceSubstring( |
|
157 | string $string, |
||
158 | string $replacement, |
||
159 | int $start, |
||
160 | int|null $length = null, |
||
161 | string $encoding = 'UTF-8', |
||
162 | ): string { |
||
163 | 9 | $stringLength = mb_strlen($string, $encoding); |
|
164 | |||
165 | 9 | if ($start < 0) { |
|
166 | 2 | $start = max(0, $stringLength + $start); |
|
167 | 7 | } elseif ($start > $stringLength) { |
|
168 | 1 | $start = $stringLength; |
|
169 | } |
||
170 | |||
171 | 9 | if ($length !== null && $length < 0) { |
|
172 | 3 | $length = max(0, $stringLength - $start + $length); |
|
173 | 6 | } elseif ($length === null || $length > $stringLength) { |
|
174 | 5 | $length = $stringLength; |
|
175 | } |
||
176 | |||
177 | 9 | if (($start + $length) > $stringLength) { |
|
178 | 4 | $length = $stringLength - $start; |
|
179 | } |
||
180 | |||
181 | 9 | return mb_substr($string, 0, $start, $encoding) |
|
182 | 9 | . $replacement |
|
183 | 9 | . mb_substr($string, $start + $length, $stringLength - $start - $length, $encoding); |
|
184 | } |
||
185 | |||
186 | /** |
||
187 | * Check if given string starts with specified substring. |
||
188 | * Binary and multibyte safe. |
||
189 | * |
||
190 | * @param string $input Input string. |
||
191 | * @param string|null $with Part to search inside the $string. |
||
192 | * |
||
193 | * @return bool Returns true if first input starts with second input, false otherwise. |
||
194 | */ |
||
195 | 19 | public static function startsWith(string $input, string|null $with): bool |
|
196 | { |
||
197 | 19 | return $with === null || str_starts_with($input, $with); |
|
198 | } |
||
199 | |||
200 | /** |
||
201 | * Check if given string starts with specified substring ignoring case. |
||
202 | * Binary and multibyte safe. |
||
203 | * |
||
204 | * @param string $input Input string. |
||
205 | * @param string|null $with Part to search inside the $string. |
||
206 | * |
||
207 | * @return bool Returns true if first input starts with second input, false otherwise. |
||
208 | */ |
||
209 | 1 | public static function startsWithIgnoringCase(string $input, string|null $with): bool |
|
210 | { |
||
211 | 1 | $bytes = self::byteLength($with); |
|
212 | |||
213 | 1 | if ($bytes === 0) { |
|
214 | 1 | return true; |
|
215 | } |
||
216 | |||
217 | /** @psalm-suppress PossiblyNullArgument */ |
||
218 | 1 | return self::lowercase(self::substring($input, 0, $bytes, '8bit')) === self::lowercase($with); |
|
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||
219 | } |
||
220 | |||
221 | /** |
||
222 | * Check if given string ends with specified substring. |
||
223 | * Binary and multibyte safe. |
||
224 | * |
||
225 | * @param string $input Input string to check. |
||
226 | * @param string|null $with Part to search inside of the $string. |
||
227 | * |
||
228 | * @return bool Returns true if first input ends with second input, false otherwise. |
||
229 | */ |
||
230 | 19 | public static function endsWith(string $input, string|null $with): bool |
|
231 | { |
||
232 | 19 | return $with === null || str_ends_with($input, $with); |
|
233 | } |
||
234 | |||
235 | /** |
||
236 | * Check if given string ends with specified substring. |
||
237 | * Binary and multibyte safe. |
||
238 | * |
||
239 | * @param string $input Input string to check. |
||
240 | * @param string|null $with Part to search inside of the $string. |
||
241 | * |
||
242 | * @return bool Returns true if first input ends with second input, false otherwise. |
||
243 | */ |
||
244 | 1 | public static function endsWithIgnoringCase(string $input, string|null $with): bool |
|
245 | { |
||
246 | 1 | $bytes = self::byteLength($with); |
|
247 | |||
248 | 1 | if ($bytes === 0) { |
|
249 | 1 | return true; |
|
250 | } |
||
251 | |||
252 | /** @psalm-suppress PossiblyNullArgument */ |
||
253 | 1 | return self::lowercase(mb_substr($input, -$bytes, mb_strlen($input, '8bit'), '8bit')) === self::lowercase($with); |
|
254 | } |
||
255 | |||
256 | /** |
||
257 | * Truncates a string from the beginning to the number of characters specified. |
||
258 | * |
||
259 | * @param string $input String to process. |
||
260 | * @param int $length Maximum length of the truncated string including trim marker. |
||
261 | * @param string $trimMarker String to append to the beginning. |
||
262 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||
263 | */ |
||
264 | 1 | public static function truncateBegin(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
|
265 | { |
||
266 | 1 | $inputLength = mb_strlen($input, $encoding); |
|
267 | |||
268 | 1 | if ($inputLength <= $length) { |
|
269 | 1 | return $input; |
|
270 | } |
||
271 | |||
272 | 1 | $trimMarkerLength = mb_strlen($trimMarker, $encoding); |
|
273 | 1 | return self::replaceSubstring($input, $trimMarker, 0, -$length + $trimMarkerLength, $encoding); |
|
274 | } |
||
275 | |||
276 | /** |
||
277 | * Truncates a string in the middle. Keeping start and end. |
||
278 | * `StringHelper::truncateMiddle('Hello world number 2', 8)` produces "Hell…r 2". |
||
279 | * |
||
280 | * @param string $input The string to truncate. |
||
281 | * @param int $length Maximum length of the truncated string including trim marker. |
||
282 | * @param string $trimMarker String to append in the middle of truncated string. |
||
283 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||
284 | * |
||
285 | * @return string The truncated string. |
||
286 | */ |
||
287 | 2 | public static function truncateMiddle(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
|
288 | { |
||
289 | 2 | $inputLength = mb_strlen($input, $encoding); |
|
290 | |||
291 | 2 | if ($inputLength <= $length) { |
|
292 | 1 | return $input; |
|
293 | } |
||
294 | |||
295 | 1 | $trimMarkerLength = mb_strlen($trimMarker, $encoding); |
|
296 | 1 | $start = (int)ceil(($length - $trimMarkerLength) / 2); |
|
297 | 1 | $end = $length - $start - $trimMarkerLength; |
|
298 | |||
299 | 1 | return self::replaceSubstring($input, $trimMarker, $start, -$end, $encoding); |
|
300 | } |
||
301 | |||
302 | /** |
||
303 | * Truncates a string from the end to the number of characters specified. |
||
304 | * |
||
305 | * @param string $input The string to truncate. |
||
306 | * @param int $length Maximum length of the truncated string including trim marker. |
||
307 | * @param string $trimMarker String to append to the end of truncated string. |
||
308 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||
309 | * |
||
310 | * @return string The truncated string. |
||
311 | */ |
||
312 | 1 | public static function truncateEnd(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
|
313 | { |
||
314 | 1 | $inputLength = mb_strlen($input, $encoding); |
|
315 | |||
316 | 1 | if ($inputLength <= $length) { |
|
317 | 1 | return $input; |
|
318 | } |
||
319 | |||
320 | 1 | $trimMarkerLength = mb_strlen($trimMarker, $encoding); |
|
321 | 1 | return rtrim(mb_substr($input, 0, $length - $trimMarkerLength, $encoding)) . $trimMarker; |
|
322 | } |
||
323 | |||
324 | /** |
||
325 | * Truncates a string to the number of words specified. |
||
326 | * |
||
327 | * @param string $input The string to truncate. |
||
328 | * @param int $count How many words from original string to include into truncated string. |
||
329 | * @param string $trimMarker String to append to the end of truncated string. |
||
330 | * |
||
331 | * @return string The truncated string. |
||
332 | */ |
||
333 | 1 | public static function truncateWords(string $input, int $count, string $trimMarker = '…'): string |
|
334 | { |
||
335 | /** @psalm-var list<string> $words */ |
||
336 | 1 | $words = preg_split('/(\s+)/u', trim($input), -1, PREG_SPLIT_DELIM_CAPTURE); |
|
337 | 1 | if (count($words) / 2 > $count) { |
|
338 | 1 | $words = array_slice($words, 0, ($count * 2) - 1); |
|
339 | 1 | return implode('', $words) . $trimMarker; |
|
340 | } |
||
341 | |||
342 | 1 | return $input; |
|
343 | } |
||
344 | |||
345 | /** |
||
346 | * Get string length. |
||
347 | * |
||
348 | * @param string $string String to calculate length for. |
||
349 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||
350 | * |
||
351 | * @see https://php.net/manual/en/function.mb-strlen.php |
||
352 | */ |
||
353 | 1 | public static function length(string $string, string $encoding = 'UTF-8'): int |
|
354 | { |
||
355 | 1 | return mb_strlen($string, $encoding); |
|
356 | } |
||
357 | |||
358 | /** |
||
359 | * Counts words in a string. |
||
360 | */ |
||
361 | 1 | public static function countWords(string $input): int |
|
362 | { |
||
363 | /** @var array $words */ |
||
364 | 1 | $words = preg_split('/\s+/u', $input, -1, PREG_SPLIT_NO_EMPTY); |
|
365 | 1 | return count($words); |
|
366 | } |
||
367 | |||
368 | /** |
||
369 | * Make a string lowercase. |
||
370 | * |
||
371 | * @param string $string String to process. |
||
372 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||
373 | * |
||
374 | * @see https://php.net/manual/en/function.mb-strtolower.php |
||
375 | */ |
||
376 | 3 | public static function lowercase(string $string, string $encoding = 'UTF-8'): string |
|
377 | { |
||
378 | 3 | return mb_strtolower($string, $encoding); |
|
379 | } |
||
380 | |||
381 | /** |
||
382 | * Make a string uppercase. |
||
383 | * |
||
384 | * @param string $string String to process. |
||
385 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||
386 | * |
||
387 | * @see https://php.net/manual/en/function.mb-strtoupper.php |
||
388 | */ |
||
389 | 15 | public static function uppercase(string $string, string $encoding = 'UTF-8'): string |
|
390 | { |
||
391 | 15 | return mb_strtoupper($string, $encoding); |
|
392 | } |
||
393 | |||
394 | /** |
||
395 | * Make a string's first character uppercase. |
||
396 | * |
||
397 | * @param string $string The string to be processed. |
||
398 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||
399 | * |
||
400 | * @see https://php.net/manual/en/function.ucfirst.php |
||
401 | */ |
||
402 | 14 | public static function uppercaseFirstCharacter(string $string, string $encoding = 'UTF-8'): string |
|
403 | { |
||
404 | 14 | $firstCharacter = self::substring($string, 0, 1, $encoding); |
|
405 | 14 | $rest = self::substring($string, 1, null, $encoding); |
|
406 | |||
407 | 14 | return self::uppercase($firstCharacter, $encoding) . $rest; |
|
408 | } |
||
409 | |||
410 | /** |
||
411 | * Uppercase the first character of each word in a string. |
||
412 | * |
||
413 | * @param string $string The string to be processed. |
||
414 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||
415 | * |
||
416 | * @see https://php.net/manual/en/function.ucwords.php |
||
417 | */ |
||
418 | 10 | public static function uppercaseFirstCharacterInEachWord(string $string, string $encoding = 'UTF-8'): string |
|
419 | { |
||
420 | 10 | $words = preg_split('/\s/u', $string, -1, PREG_SPLIT_NO_EMPTY); |
|
421 | |||
422 | 10 | $wordsWithUppercaseFirstCharacter = array_map( |
|
423 | 10 | static fn (string $word) => self::uppercaseFirstCharacter($word, $encoding), |
|
424 | 10 | $words |
|
425 | 10 | ); |
|
426 | |||
427 | 10 | return implode(' ', $wordsWithUppercaseFirstCharacter); |
|
428 | } |
||
429 | |||
430 | /** |
||
431 | * Encodes string into "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648). |
||
432 | * |
||
433 | * > Note: Base 64 padding `=` may be at the end of the returned string. |
||
434 | * > `=` is not transparent to URL encoding. |
||
435 | * |
||
436 | * @see https://tools.ietf.org/html/rfc4648#page-7 |
||
437 | * |
||
438 | * @param string $input The string to encode. |
||
439 | * |
||
440 | * @return string Encoded string. |
||
441 | */ |
||
442 | 4 | public static function base64UrlEncode(string $input): string |
|
443 | { |
||
444 | 4 | return strtr(base64_encode($input), '+/', '-_'); |
|
445 | } |
||
446 | |||
447 | /** |
||
448 | * Decodes "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648). |
||
449 | * |
||
450 | * @see https://tools.ietf.org/html/rfc4648#page-7 |
||
451 | * |
||
452 | * @param string $input Encoded string. |
||
453 | * |
||
454 | * @return string Decoded string. |
||
455 | */ |
||
456 | 4 | public static function base64UrlDecode(string $input): string |
|
457 | { |
||
458 | 4 | return base64_decode(strtr($input, '-_', '+/')); |
|
459 | } |
||
460 | |||
461 | /** |
||
462 | * Split a string to array with non-empty lines. |
||
463 | * Whitespace from the beginning and end of a each line will be stripped. |
||
464 | * |
||
465 | * @param string $string The input string. |
||
466 | * @param string $separator The boundary string. It is a part of regular expression |
||
467 | * so should be taken into account or properly escaped with {@see preg_quote()}. |
||
468 | */ |
||
469 | 16 | public static function split(string $string, string $separator = '\R'): array |
|
470 | { |
||
471 | 16 | $string = preg_replace('(^\s*|\s*$)', '', $string); |
|
472 | 16 | return preg_split('~\s*' . $separator . '\s*~u', $string, -1, PREG_SPLIT_NO_EMPTY); |
|
473 | } |
||
474 | |||
475 | /** |
||
476 | * @param string $path The path of where do you want to write a value to `$array`. The path can be described by |
||
477 | * a string when each key should be separated by delimiter. If a path item contains delimiter, it can be escaped |
||
478 | * with "\" (backslash) or a custom delimiter can be used. |
||
479 | * @param string $delimiter A separator, used to parse string key for embedded object property retrieving. Defaults |
||
480 | * to "." (dot). |
||
481 | * @param string $escapeCharacter An escape character, used to escape delimiter. Defaults to "\" (backslash). |
||
482 | * @param bool $preserveDelimiterEscaping Whether to preserve delimiter escaping in the items of final array (in |
||
483 | * case of using string as an input). When `false`, "\" (backslashes) are removed. For a "." as delimiter, "." |
||
484 | * becomes "\.". Defaults to `false`. |
||
485 | * |
||
486 | * @return string[] |
||
487 | * |
||
488 | * @psalm-return list<string> |
||
489 | */ |
||
490 | 35 | public static function parsePath( |
|
491 | string $path, |
||
492 | string $delimiter = '.', |
||
493 | string $escapeCharacter = '\\', |
||
494 | bool $preserveDelimiterEscaping = false |
||
495 | ): array { |
||
496 | 35 | if (strlen($delimiter) !== 1) { |
|
497 | 1 | throw new InvalidArgumentException('Only 1 character is allowed for delimiter.'); |
|
498 | } |
||
499 | |||
500 | 34 | if (strlen($escapeCharacter) !== 1) { |
|
501 | 1 | throw new InvalidArgumentException('Only 1 escape character is allowed.'); |
|
502 | } |
||
503 | |||
504 | 33 | if ($delimiter === $escapeCharacter) { |
|
505 | 1 | throw new InvalidArgumentException('Delimiter and escape character must be different.'); |
|
506 | } |
||
507 | |||
508 | 32 | if ($path === '') { |
|
509 | 2 | return []; |
|
510 | } |
||
511 | |||
512 | 30 | if (!str_contains($path, $delimiter)) { |
|
513 | 3 | if ($preserveDelimiterEscaping) { |
|
514 | 1 | return [$path]; |
|
515 | } |
||
516 | |||
517 | 2 | return [str_replace($escapeCharacter . $escapeCharacter, $escapeCharacter, $path)]; |
|
518 | } |
||
519 | |||
520 | /** @psalm-var non-empty-list<array{0:string, 1:int}> $matches */ |
||
521 | 27 | $matches = preg_split( |
|
522 | 27 | sprintf( |
|
523 | 27 | '/(?<!%1$s)((?>%1$s%1$s)*)%2$s/', |
|
524 | 27 | preg_quote($escapeCharacter, '/'), |
|
525 | 27 | preg_quote($delimiter, '/') |
|
526 | 27 | ), |
|
527 | 27 | $path, |
|
528 | 27 | -1, |
|
529 | 27 | PREG_SPLIT_OFFSET_CAPTURE |
|
530 | 27 | ); |
|
531 | 27 | $result = []; |
|
532 | 27 | $countResults = count($matches); |
|
533 | 27 | for ($i = 1; $i < $countResults; $i++) { |
|
534 | 25 | $l = $matches[$i][1] - $matches[$i - 1][1] - strlen($matches[$i - 1][0]) - 1; |
|
535 | 25 | $result[] = $matches[$i - 1][0] . ($l > 0 ? str_repeat($escapeCharacter, $l) : ''); |
|
536 | } |
||
537 | 27 | $result[] = $matches[$countResults - 1][0]; |
|
538 | |||
539 | 27 | if ($preserveDelimiterEscaping === true) { |
|
540 | 1 | return $result; |
|
541 | } |
||
542 | |||
543 | 26 | return array_map( |
|
544 | 26 | static fn (string $key): string => str_replace( |
|
545 | 26 | [ |
|
546 | 26 | $escapeCharacter . $escapeCharacter, |
|
547 | 26 | $escapeCharacter . $delimiter, |
|
548 | 26 | ], |
|
549 | 26 | [ |
|
550 | 26 | $escapeCharacter, |
|
551 | 26 | $delimiter, |
|
552 | 26 | ], |
|
553 | 26 | $key |
|
554 | 26 | ), |
|
555 | 26 | $result |
|
556 | 26 | ); |
|
557 | } |
||
558 | |||
559 | /** |
||
560 | * Strip Unicode whitespace (with Unicode symbol property White_Space=yes) or other characters from the beginning and end of a string. |
||
561 | * Input string and pattern are treated as UTF-8. |
||
562 | * |
||
563 | * @see https://en.wikipedia.org/wiki/Whitespace_character#Unicode |
||
564 | * @see https://www.php.net/manual/function.preg-replace |
||
565 | * |
||
566 | * @param string|string[] $string The string or an array with strings. |
||
567 | * @param string $pattern PCRE regex pattern to search for, as UTF-8 string. Use {@see preg_quote()} to quote `$pattern` if it contains |
||
568 | * special regular expression characters. |
||
569 | * |
||
570 | * @psalm-template TKey of array-key |
||
571 | * @psalm-param string|array<TKey, string> $string |
||
572 | * @psalm-param non-empty-string $pattern |
||
573 | * @psalm-return ($string is array ? array<TKey, string> : string) |
||
574 | * |
||
575 | * @return string|string[] |
||
576 | */ |
||
577 | 16 | public static function trim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
|
578 | { |
||
579 | 16 | self::ensureUtf8Pattern($pattern); |
|
580 | |||
581 | 15 | return preg_replace("#^[$pattern]+|[$pattern]+$#uD", '', $string); |
|
582 | } |
||
583 | |||
584 | /** |
||
585 | * Strip Unicode whitespace (with Unicode symbol property White_Space=yes) or other characters from the beginning of a string. |
||
586 | * |
||
587 | * @see self::trim() |
||
588 | * |
||
589 | * @param string|string[] $string The string or an array with strings. |
||
590 | * @param string $pattern PCRE regex pattern to search for, as UTF-8 string. Use {@see preg_quote()} to quote `$pattern` if it contains |
||
591 | * special regular expression characters. |
||
592 | * |
||
593 | * @psalm-template TKey of array-key |
||
594 | * @psalm-param string|array<TKey, string> $string |
||
595 | * @psalm-param non-empty-string $pattern |
||
596 | * @psalm-return ($string is array ? array<TKey, string> : string) |
||
597 | * |
||
598 | * @return string|string[] |
||
599 | */ |
||
600 | 12 | public static function ltrim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
|
601 | { |
||
602 | 12 | self::ensureUtf8Pattern($pattern); |
|
603 | |||
604 | 12 | return preg_replace("#^[$pattern]+#u", '', $string); |
|
605 | } |
||
606 | |||
607 | /** |
||
608 | * Strip Unicode whitespace (with Unicode symbol property White_Space=yes) or other characters from the end of a string. |
||
609 | * |
||
610 | * @see self::trim() |
||
611 | * |
||
612 | * @param string|string[] $string The string or an array with strings. |
||
613 | * @param string $pattern PCRE regex pattern to search for, as UTF-8 string. Use {@see preg_quote()} to quote `$pattern` if it contains |
||
614 | * special regular expression characters. |
||
615 | * |
||
616 | * @psalm-template TKey of array-key |
||
617 | * @psalm-param string|array<TKey, string> $string |
||
618 | * @psalm-param non-empty-string $pattern |
||
619 | * @psalm-return ($string is array ? array<TKey, string> : string) |
||
620 | * |
||
621 | * @return string|string[] |
||
622 | */ |
||
623 | 14 | public static function rtrim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
|
624 | { |
||
625 | 14 | self::ensureUtf8Pattern($pattern); |
|
626 | |||
627 | 14 | return preg_replace("#[$pattern]+$#uD", '', $string); |
|
628 | } |
||
629 | |||
630 | /** |
||
631 | * Returns the portion of the string that lies between the first occurrence of the `$start` string |
||
632 | * and the last occurrence of the `$end` string after that. |
||
633 | * |
||
634 | * @param string $string The input string. |
||
635 | * @param string $start The string marking the start of the portion to extract. |
||
636 | * @param string|null $end The string marking the end of the portion to extract. |
||
637 | * If the `$end` string is not provided, it defaults to the value of the `$start` string. |
||
638 | * @return string|null The portion of the string between the first occurrence of |
||
639 | * `$start` and the last occurrence of `$end`, or null if either `$start` or `$end` cannot be found. |
||
640 | */ |
||
641 | 14 | public static function findBetween(string $string, string $start, ?string $end = null): ?string |
|
642 | { |
||
643 | 14 | if ($end === null) { |
|
644 | 1 | $end = $start; |
|
645 | } |
||
646 | |||
647 | 14 | $startPos = mb_strpos($string, $start); |
|
648 | |||
649 | 14 | if ($startPos === false) { |
|
650 | 3 | return null; |
|
651 | } |
||
652 | |||
653 | 11 | $startPos += mb_strlen($start); |
|
654 | 11 | $endPos = mb_strrpos($string, $end, $startPos); |
|
655 | |||
656 | 11 | if ($endPos === false) { |
|
657 | 2 | return null; |
|
658 | } |
||
659 | |||
660 | 9 | return mb_substr($string, $startPos, $endPos - $startPos); |
|
661 | } |
||
662 | |||
663 | /** |
||
664 | * Returns the portion of the string between the initial occurrence of the '$start' string |
||
665 | * and the next occurrence of the '$end' string. |
||
666 | * |
||
667 | * @param string $string The input string. |
||
668 | * @param string $start The string marking the beginning of the segment to extract. |
||
669 | * @param string|null $end The string marking the termination of the segment. |
||
670 | * If the '$end' string is not provided, it defaults to the value of the '$start' string. |
||
671 | * @return string|null Extracted segment, or null if '$start' or '$end' is not present. |
||
672 | */ |
||
673 | 16 | public static function findBetweenFirst(string $string, string $start, ?string $end = null): ?string |
|
674 | { |
||
675 | 16 | if ($end === null) { |
|
676 | 1 | $end = $start; |
|
677 | } |
||
678 | |||
679 | 16 | $startPos = mb_strpos($string, $start); |
|
680 | |||
681 | 16 | if ($startPos === false) { |
|
682 | 3 | return null; |
|
683 | } |
||
684 | |||
685 | 13 | $startPos += mb_strlen($start); |
|
686 | 13 | $endPos = mb_strpos($string, $end, $startPos); |
|
687 | |||
688 | 13 | if ($endPos === false) { |
|
689 | 2 | return null; |
|
690 | } |
||
691 | |||
692 | 11 | return mb_substr($string, $startPos, $endPos - $startPos); |
|
693 | } |
||
694 | |||
695 | /** |
||
696 | * Returns the portion of the string between the latest '$start' string |
||
697 | * and the subsequent '$end' string. |
||
698 | * |
||
699 | * @param string $string The input string. |
||
700 | * @param string $start The string marking the beginning of the segment to extract. |
||
701 | * @param string|null $end The string marking the termination of the segment. |
||
702 | * If the '$end' string is not provided, it defaults to the value of the '$start' string. |
||
703 | * @return string|null Extracted segment, or null if '$start' or '$end' is not present. |
||
704 | */ |
||
705 | 16 | public static function findBetweenLast(string $string, string $start, ?string $end = null): ?string |
|
706 | { |
||
707 | 16 | if ($end === null) { |
|
708 | 1 | $end = $start; |
|
709 | } |
||
710 | |||
711 | 16 | $endPos = mb_strrpos($string, $end); |
|
712 | |||
713 | 16 | if ($endPos === false) { |
|
714 | 3 | return null; |
|
715 | } |
||
716 | |||
717 | 13 | $startPos = mb_strrpos(mb_substr($string, 0, $endPos), $start); |
|
718 | |||
719 | 13 | if ($startPos === false) { |
|
720 | 2 | return null; |
|
721 | } |
||
722 | |||
723 | 11 | $startPos += mb_strlen($start); |
|
724 | |||
725 | 11 | return mb_substr($string, $startPos, $endPos - $startPos); |
|
726 | } |
||
727 | |||
728 | /** |
||
729 | * Ensure the input string is a valid UTF-8 string. |
||
730 | * |
||
731 | * @param string $pattern The input string. |
||
732 | * |
||
733 | * @throws InvalidArgumentException |
||
734 | */ |
||
735 | 42 | private static function ensureUtf8Pattern(string $pattern): void |
|
736 | { |
||
737 | 42 | if (!preg_match('##u', $pattern)) { |
|
738 | 1 | throw new InvalidArgumentException('Pattern is not a valid UTF-8 string.'); |
|
739 | } |
||
740 | } |
||
741 | } |
||
742 |