1 | <?php |
||||
2 | |||||
3 | declare(strict_types=1); |
||||
4 | |||||
5 | namespace Yiisoft\Strings; |
||||
6 | |||||
7 | use InvalidArgumentException; |
||||
8 | |||||
9 | use function array_map; |
||||
10 | use function array_slice; |
||||
11 | use function base64_decode; |
||||
12 | use function base64_encode; |
||||
13 | use function ceil; |
||||
14 | use function count; |
||||
15 | use function implode; |
||||
16 | use function max; |
||||
17 | use function mb_strlen; |
||||
18 | use function mb_strrpos; |
||||
19 | use function mb_strtolower; |
||||
20 | use function mb_strtoupper; |
||||
21 | use function mb_substr; |
||||
22 | use function preg_match; |
||||
23 | use function preg_quote; |
||||
24 | use function preg_replace; |
||||
25 | use function preg_split; |
||||
26 | use function rtrim; |
||||
27 | use function sprintf; |
||||
28 | use function str_ends_with; |
||||
29 | use function str_repeat; |
||||
30 | use function str_replace; |
||||
31 | use function str_starts_with; |
||||
32 | use function strlen; |
||||
33 | use function strtr; |
||||
34 | use function trim; |
||||
35 | |||||
36 | /** |
||||
37 | * Provides static methods to work with strings. |
||||
38 | */ |
||||
39 | final class StringHelper |
||||
40 | { |
||||
41 | public const DEFAULT_WHITESPACE_PATTERN = "\pC\pZ"; |
||||
42 | |||||
43 | /** |
||||
44 | * Returns the number of bytes in the given string. |
||||
45 | * This method ensures the string is treated as a byte array even if `mbstring.func_overload` is turned on |
||||
46 | * by using {@see mb_strlen()}. |
||||
47 | * |
||||
48 | * @param string|null $input The string being measured for length. |
||||
49 | * |
||||
50 | * @return int The number of bytes in the given string. |
||||
51 | */ |
||||
52 | 2 | public static function byteLength(string|null $input): int |
|||
53 | { |
||||
54 | 2 | return mb_strlen((string)$input, '8bit'); |
|||
55 | } |
||||
56 | |||||
57 | /** |
||||
58 | * Returns the portion of string specified by the start and length parameters. |
||||
59 | * This method ensures the string is treated as a byte array by using `mb_substr()`. |
||||
60 | * |
||||
61 | * @param string $input The input string. Must be one character or longer. |
||||
62 | * @param int $start The starting position. |
||||
63 | * @param int|null $length The desired portion length. If not specified or `null`, there will be |
||||
64 | * no limit on length i.e. the output will be until the end of the string. |
||||
65 | * |
||||
66 | * @return string The extracted part of string, or FALSE on failure or an empty string. |
||||
67 | * |
||||
68 | * @see https://www.php.net/manual/en/function.substr.php |
||||
69 | */ |
||||
70 | 1 | public static function byteSubstring(string $input, int $start, int $length = null): string |
|||
71 | { |
||||
72 | 1 | return mb_substr($input, $start, $length ?? mb_strlen($input, '8bit'), '8bit'); |
|||
73 | } |
||||
74 | |||||
75 | /** |
||||
76 | * Returns the trailing name component of a path. |
||||
77 | * This method is similar to the php function `basename()` except that it will |
||||
78 | * treat both \ and / as directory separators, independent of the operating system. |
||||
79 | * This method was mainly created to work on php namespaces. When working with real |
||||
80 | * file paths, PHP's `basename()` should work fine for you. |
||||
81 | * Note: this method is not aware of the actual filesystem, or path components such as "..". |
||||
82 | * |
||||
83 | * @param string $path A path string. |
||||
84 | * @param string $suffix If the name component ends in suffix this will also be cut off. |
||||
85 | * |
||||
86 | * @return string The trailing name component of the given path. |
||||
87 | * |
||||
88 | * @see https://www.php.net/manual/en/function.basename.php |
||||
89 | */ |
||||
90 | 1 | public static function baseName(string $path, string $suffix = ''): string |
|||
91 | { |
||||
92 | 1 | $length = mb_strlen($suffix); |
|||
93 | 1 | if ($length > 0 && mb_substr($path, -$length) === $suffix) { |
|||
94 | 1 | $path = mb_substr($path, 0, -$length); |
|||
95 | } |
||||
96 | 1 | $path = rtrim(str_replace('\\', '/', $path), '/\\'); |
|||
97 | 1 | $position = mb_strrpos($path, '/'); |
|||
98 | 1 | if ($position !== false) { |
|||
99 | 1 | return mb_substr($path, $position + 1); |
|||
100 | } |
||||
101 | |||||
102 | 1 | return $path; |
|||
103 | } |
||||
104 | |||||
105 | /** |
||||
106 | * Returns parent directory's path. |
||||
107 | * This method is similar to `dirname()` except that it will treat |
||||
108 | * both \ and / as directory separators, independent of the operating system. |
||||
109 | * |
||||
110 | * @param string $path A path string. |
||||
111 | * |
||||
112 | * @return string The parent directory's path. |
||||
113 | * |
||||
114 | * @see https://www.php.net/manual/en/function.basename.php |
||||
115 | */ |
||||
116 | 1 | public static function directoryName(string $path): string |
|||
117 | { |
||||
118 | 1 | $position = mb_strrpos(str_replace('\\', '/', $path), '/'); |
|||
119 | 1 | if ($position !== false) { |
|||
120 | 1 | return mb_substr($path, 0, $position); |
|||
121 | } |
||||
122 | |||||
123 | 1 | return ''; |
|||
124 | } |
||||
125 | |||||
126 | /** |
||||
127 | * Get part of string. |
||||
128 | * |
||||
129 | * @param string $string To get substring from. |
||||
130 | * @param int $start Character to start at. |
||||
131 | * @param int|null $length Number of characters to get. |
||||
132 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||||
133 | * |
||||
134 | * @see https://php.net/manual/en/function.mb-substr.php |
||||
135 | */ |
||||
136 | 15 | public static function substring(string $string, int $start, int $length = null, string $encoding = 'UTF-8'): string |
|||
137 | { |
||||
138 | 15 | return mb_substr($string, $start, $length, $encoding); |
|||
139 | } |
||||
140 | |||||
141 | /** |
||||
142 | * Replace text within a portion of a string. |
||||
143 | * |
||||
144 | * @param string $string The input string. |
||||
145 | * @param string $replacement The replacement string. |
||||
146 | * @param int $start Position to begin replacing substring at. |
||||
147 | * If start is non-negative, the replacing will begin at the start'th offset into string. |
||||
148 | * If start is negative, the replacing will begin at the start'th character from the end of string. |
||||
149 | * @param int|null $length Length of the substring to be replaced. |
||||
150 | * If given and is positive, it represents the length of the portion of string which is to be replaced. |
||||
151 | * If it is negative, it represents the number of characters from the end of string at which to stop replacing. |
||||
152 | * If it is not given, then it will default to the length of the string; i.e. end the replacing at the end of string. |
||||
153 | * If length is zero then this function will have the effect of inserting replacement into string at the given start offset. |
||||
154 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||||
155 | */ |
||||
156 | 9 | public static function replaceSubstring( |
|||
157 | string $string, |
||||
158 | string $replacement, |
||||
159 | int $start, |
||||
160 | int|null $length = null, |
||||
161 | string $encoding = 'UTF-8', |
||||
162 | ): string { |
||||
163 | 9 | $stringLength = mb_strlen($string, $encoding); |
|||
164 | |||||
165 | 9 | if ($start < 0) { |
|||
166 | 2 | $start = max(0, $stringLength + $start); |
|||
167 | 7 | } elseif ($start > $stringLength) { |
|||
168 | 1 | $start = $stringLength; |
|||
169 | } |
||||
170 | |||||
171 | 9 | if ($length !== null && $length < 0) { |
|||
172 | 3 | $length = max(0, $stringLength - $start + $length); |
|||
173 | 6 | } elseif ($length === null || $length > $stringLength) { |
|||
174 | 5 | $length = $stringLength; |
|||
175 | } |
||||
176 | |||||
177 | 9 | if (($start + $length) > $stringLength) { |
|||
178 | 4 | $length = $stringLength - $start; |
|||
179 | } |
||||
180 | |||||
181 | 9 | return mb_substr($string, 0, $start, $encoding) |
|||
182 | 9 | . $replacement |
|||
183 | 9 | . mb_substr($string, $start + $length, $stringLength - $start - $length, $encoding); |
|||
184 | } |
||||
185 | |||||
186 | /** |
||||
187 | * Check if given string starts with specified substring. |
||||
188 | * Binary and multibyte safe. |
||||
189 | * |
||||
190 | * @param string $input Input string. |
||||
191 | * @param string|null $with Part to search inside the $string. |
||||
192 | * |
||||
193 | * @return bool Returns true if first input starts with second input, false otherwise. |
||||
194 | */ |
||||
195 | 19 | public static function startsWith(string $input, string|null $with): bool |
|||
196 | { |
||||
197 | 19 | return $with === null || str_starts_with($input, $with); |
|||
198 | } |
||||
199 | |||||
200 | /** |
||||
201 | * Check if given string starts with specified substring ignoring case. |
||||
202 | * Binary and multibyte safe. |
||||
203 | * |
||||
204 | * @param string $input Input string. |
||||
205 | * @param string|null $with Part to search inside the $string. |
||||
206 | * |
||||
207 | * @return bool Returns true if first input starts with second input, false otherwise. |
||||
208 | */ |
||||
209 | 1 | public static function startsWithIgnoringCase(string $input, string|null $with): bool |
|||
210 | { |
||||
211 | 1 | $bytes = self::byteLength($with); |
|||
212 | |||||
213 | 1 | if ($bytes === 0) { |
|||
214 | 1 | return true; |
|||
215 | } |
||||
216 | |||||
217 | /** @psalm-suppress PossiblyNullArgument */ |
||||
218 | 1 | return self::lowercase(self::substring($input, 0, $bytes, '8bit')) === self::lowercase($with); |
|||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||||
219 | } |
||||
220 | |||||
221 | /** |
||||
222 | * Check if given string ends with specified substring. |
||||
223 | * Binary and multibyte safe. |
||||
224 | * |
||||
225 | * @param string $input Input string to check. |
||||
226 | * @param string|null $with Part to search inside of the $string. |
||||
227 | * |
||||
228 | * @return bool Returns true if first input ends with second input, false otherwise. |
||||
229 | */ |
||||
230 | 19 | public static function endsWith(string $input, string|null $with): bool |
|||
231 | { |
||||
232 | 19 | return $with === null || str_ends_with($input, $with); |
|||
233 | } |
||||
234 | |||||
235 | /** |
||||
236 | * Check if given string ends with specified substring. |
||||
237 | * Binary and multibyte safe. |
||||
238 | * |
||||
239 | * @param string $input Input string to check. |
||||
240 | * @param string|null $with Part to search inside of the $string. |
||||
241 | * |
||||
242 | * @return bool Returns true if first input ends with second input, false otherwise. |
||||
243 | */ |
||||
244 | 1 | public static function endsWithIgnoringCase(string $input, string|null $with): bool |
|||
245 | { |
||||
246 | 1 | $bytes = self::byteLength($with); |
|||
247 | |||||
248 | 1 | if ($bytes === 0) { |
|||
249 | 1 | return true; |
|||
250 | } |
||||
251 | |||||
252 | /** @psalm-suppress PossiblyNullArgument */ |
||||
253 | 1 | return self::lowercase(mb_substr($input, -$bytes, mb_strlen($input, '8bit'), '8bit')) === self::lowercase($with); |
|||
0 ignored issues
–
show
It seems like
$with can also be of type null ; however, parameter $string of Yiisoft\Strings\StringHelper::lowercase() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
254 | } |
||||
255 | |||||
256 | /** |
||||
257 | * Truncates a string from the beginning to the number of characters specified. |
||||
258 | * |
||||
259 | * @param string $input String to process. |
||||
260 | * @param int $length Maximum length of the truncated string including trim marker. |
||||
261 | * @param string $trimMarker String to append to the beginning. |
||||
262 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||||
263 | */ |
||||
264 | 1 | public static function truncateBegin(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
|||
265 | { |
||||
266 | 1 | $inputLength = mb_strlen($input, $encoding); |
|||
267 | |||||
268 | 1 | if ($inputLength <= $length) { |
|||
269 | 1 | return $input; |
|||
270 | } |
||||
271 | |||||
272 | 1 | $trimMarkerLength = mb_strlen($trimMarker, $encoding); |
|||
273 | 1 | return self::replaceSubstring($input, $trimMarker, 0, -$length + $trimMarkerLength, $encoding); |
|||
274 | } |
||||
275 | |||||
276 | /** |
||||
277 | * Truncates a string in the middle. Keeping start and end. |
||||
278 | * `StringHelper::truncateMiddle('Hello world number 2', 8)` produces "Hell…r 2". |
||||
279 | * |
||||
280 | * @param string $input The string to truncate. |
||||
281 | * @param int $length Maximum length of the truncated string including trim marker. |
||||
282 | * @param string $trimMarker String to append in the middle of truncated string. |
||||
283 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||||
284 | * |
||||
285 | * @return string The truncated string. |
||||
286 | */ |
||||
287 | 2 | public static function truncateMiddle(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
|||
288 | { |
||||
289 | 2 | $inputLength = mb_strlen($input, $encoding); |
|||
290 | |||||
291 | 2 | if ($inputLength <= $length) { |
|||
292 | 1 | return $input; |
|||
293 | } |
||||
294 | |||||
295 | 1 | $trimMarkerLength = mb_strlen($trimMarker, $encoding); |
|||
296 | 1 | $start = (int)ceil(($length - $trimMarkerLength) / 2); |
|||
297 | 1 | $end = $length - $start - $trimMarkerLength; |
|||
298 | |||||
299 | 1 | return self::replaceSubstring($input, $trimMarker, $start, -$end, $encoding); |
|||
300 | } |
||||
301 | |||||
302 | /** |
||||
303 | * Truncates a string from the end to the number of characters specified. |
||||
304 | * |
||||
305 | * @param string $input The string to truncate. |
||||
306 | * @param int $length Maximum length of the truncated string including trim marker. |
||||
307 | * @param string $trimMarker String to append to the end of truncated string. |
||||
308 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||||
309 | * |
||||
310 | * @return string The truncated string. |
||||
311 | */ |
||||
312 | 1 | public static function truncateEnd(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
|||
313 | { |
||||
314 | 1 | $inputLength = mb_strlen($input, $encoding); |
|||
315 | |||||
316 | 1 | if ($inputLength <= $length) { |
|||
317 | 1 | return $input; |
|||
318 | } |
||||
319 | |||||
320 | 1 | $trimMarkerLength = mb_strlen($trimMarker, $encoding); |
|||
321 | 1 | return rtrim(mb_substr($input, 0, $length - $trimMarkerLength, $encoding)) . $trimMarker; |
|||
322 | } |
||||
323 | |||||
324 | /** |
||||
325 | * Truncates a string to the number of words specified. |
||||
326 | * |
||||
327 | * @param string $input The string to truncate. |
||||
328 | * @param int $count How many words from original string to include into truncated string. |
||||
329 | * @param string $trimMarker String to append to the end of truncated string. |
||||
330 | * |
||||
331 | * @return string The truncated string. |
||||
332 | */ |
||||
333 | 1 | public static function truncateWords(string $input, int $count, string $trimMarker = '…'): string |
|||
334 | { |
||||
335 | /** @psalm-var list<string> $words */ |
||||
336 | 1 | $words = preg_split('/(\s+)/u', trim($input), -1, PREG_SPLIT_DELIM_CAPTURE); |
|||
337 | 1 | if (count($words) / 2 > $count) { |
|||
338 | 1 | $words = array_slice($words, 0, ($count * 2) - 1); |
|||
339 | 1 | return implode('', $words) . $trimMarker; |
|||
340 | } |
||||
341 | |||||
342 | 1 | return $input; |
|||
343 | } |
||||
344 | |||||
345 | /** |
||||
346 | * Get string length. |
||||
347 | * |
||||
348 | * @param string $string String to calculate length for. |
||||
349 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||||
350 | * |
||||
351 | * @see https://php.net/manual/en/function.mb-strlen.php |
||||
352 | */ |
||||
353 | 1 | public static function length(string $string, string $encoding = 'UTF-8'): int |
|||
354 | { |
||||
355 | 1 | return mb_strlen($string, $encoding); |
|||
356 | } |
||||
357 | |||||
358 | /** |
||||
359 | * Counts words in a string. |
||||
360 | */ |
||||
361 | 1 | public static function countWords(string $input): int |
|||
362 | { |
||||
363 | /** @var array $words */ |
||||
364 | 1 | $words = preg_split('/\s+/u', $input, -1, PREG_SPLIT_NO_EMPTY); |
|||
365 | 1 | return count($words); |
|||
366 | } |
||||
367 | |||||
368 | /** |
||||
369 | * Make a string lowercase. |
||||
370 | * |
||||
371 | * @param string $string String to process. |
||||
372 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||||
373 | * |
||||
374 | * @see https://php.net/manual/en/function.mb-strtolower.php |
||||
375 | */ |
||||
376 | 3 | public static function lowercase(string $string, string $encoding = 'UTF-8'): string |
|||
377 | { |
||||
378 | 3 | return mb_strtolower($string, $encoding); |
|||
379 | } |
||||
380 | |||||
381 | /** |
||||
382 | * Make a string uppercase. |
||||
383 | * |
||||
384 | * @param string $string String to process. |
||||
385 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||||
386 | * |
||||
387 | * @see https://php.net/manual/en/function.mb-strtoupper.php |
||||
388 | */ |
||||
389 | 15 | public static function uppercase(string $string, string $encoding = 'UTF-8'): string |
|||
390 | { |
||||
391 | 15 | return mb_strtoupper($string, $encoding); |
|||
392 | } |
||||
393 | |||||
394 | /** |
||||
395 | * Make a string's first character uppercase. |
||||
396 | * |
||||
397 | * @param string $string The string to be processed. |
||||
398 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||||
399 | * |
||||
400 | * @see https://php.net/manual/en/function.ucfirst.php |
||||
401 | */ |
||||
402 | 14 | public static function uppercaseFirstCharacter(string $string, string $encoding = 'UTF-8'): string |
|||
403 | { |
||||
404 | 14 | $firstCharacter = self::substring($string, 0, 1, $encoding); |
|||
405 | 14 | $rest = self::substring($string, 1, null, $encoding); |
|||
406 | |||||
407 | 14 | return self::uppercase($firstCharacter, $encoding) . $rest; |
|||
408 | } |
||||
409 | |||||
410 | /** |
||||
411 | * Uppercase the first character of each word in a string. |
||||
412 | * |
||||
413 | * @param string $string The string to be processed. |
||||
414 | * @param string $encoding The encoding to use, defaults to "UTF-8". |
||||
415 | * |
||||
416 | * @see https://php.net/manual/en/function.ucwords.php |
||||
417 | */ |
||||
418 | 10 | public static function uppercaseFirstCharacterInEachWord(string $string, string $encoding = 'UTF-8'): string |
|||
419 | { |
||||
420 | 10 | $words = preg_split('/\s/u', $string, -1, PREG_SPLIT_NO_EMPTY); |
|||
421 | |||||
422 | 10 | $wordsWithUppercaseFirstCharacter = array_map( |
|||
423 | 10 | static fn (string $word) => self::uppercaseFirstCharacter($word, $encoding), |
|||
424 | 10 | $words |
|||
425 | 10 | ); |
|||
426 | |||||
427 | 10 | return implode(' ', $wordsWithUppercaseFirstCharacter); |
|||
428 | } |
||||
429 | |||||
430 | /** |
||||
431 | * Encodes string into "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648). |
||||
432 | * |
||||
433 | * > Note: Base 64 padding `=` may be at the end of the returned string. |
||||
434 | * > `=` is not transparent to URL encoding. |
||||
435 | * |
||||
436 | * @see https://tools.ietf.org/html/rfc4648#page-7 |
||||
437 | * |
||||
438 | * @param string $input The string to encode. |
||||
439 | * |
||||
440 | * @return string Encoded string. |
||||
441 | */ |
||||
442 | 4 | public static function base64UrlEncode(string $input): string |
|||
443 | { |
||||
444 | 4 | return strtr(base64_encode($input), '+/', '-_'); |
|||
445 | } |
||||
446 | |||||
447 | /** |
||||
448 | * Decodes "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648). |
||||
449 | * |
||||
450 | * @see https://tools.ietf.org/html/rfc4648#page-7 |
||||
451 | * |
||||
452 | * @param string $input Encoded string. |
||||
453 | * |
||||
454 | * @return string Decoded string. |
||||
455 | */ |
||||
456 | 4 | public static function base64UrlDecode(string $input): string |
|||
457 | { |
||||
458 | 4 | return base64_decode(strtr($input, '-_', '+/')); |
|||
459 | } |
||||
460 | |||||
461 | /** |
||||
462 | * Split a string to array with non-empty lines. |
||||
463 | * Whitespace from the beginning and end of a each line will be stripped. |
||||
464 | * |
||||
465 | * @param string $string The input string. |
||||
466 | * @param string $separator The boundary string. It is a part of regular expression |
||||
467 | * so should be taken into account or properly escaped with {@see preg_quote()}. |
||||
468 | */ |
||||
469 | 16 | public static function split(string $string, string $separator = '\R'): array |
|||
470 | { |
||||
471 | 16 | $string = preg_replace('(^\s*|\s*$)', '', $string); |
|||
472 | 16 | return preg_split('~\s*' . $separator . '\s*~u', $string, -1, PREG_SPLIT_NO_EMPTY); |
|||
473 | } |
||||
474 | |||||
475 | /** |
||||
476 | * @param string $path The path of where do you want to write a value to `$array`. The path can be described by |
||||
477 | * a string when each key should be separated by delimiter. If a path item contains delimiter, it can be escaped |
||||
478 | * with "\" (backslash) or a custom delimiter can be used. |
||||
479 | * @param string $delimiter A separator, used to parse string key for embedded object property retrieving. Defaults |
||||
480 | * to "." (dot). |
||||
481 | * @param string $escapeCharacter An escape character, used to escape delimiter. Defaults to "\" (backslash). |
||||
482 | * @param bool $preserveDelimiterEscaping Whether to preserve delimiter escaping in the items of final array (in |
||||
483 | * case of using string as an input). When `false`, "\" (backslashes) are removed. For a "." as delimiter, "." |
||||
484 | * becomes "\.". Defaults to `false`. |
||||
485 | * |
||||
486 | * @return string[] |
||||
487 | * |
||||
488 | * @psalm-return list<string> |
||||
489 | */ |
||||
490 | 35 | public static function parsePath( |
|||
491 | string $path, |
||||
492 | string $delimiter = '.', |
||||
493 | string $escapeCharacter = '\\', |
||||
494 | bool $preserveDelimiterEscaping = false |
||||
495 | ): array { |
||||
496 | 35 | if (strlen($delimiter) !== 1) { |
|||
497 | 1 | throw new InvalidArgumentException('Only 1 character is allowed for delimiter.'); |
|||
498 | } |
||||
499 | |||||
500 | 34 | if (strlen($escapeCharacter) !== 1) { |
|||
501 | 1 | throw new InvalidArgumentException('Only 1 escape character is allowed.'); |
|||
502 | } |
||||
503 | |||||
504 | 33 | if ($delimiter === $escapeCharacter) { |
|||
505 | 1 | throw new InvalidArgumentException('Delimiter and escape character must be different.'); |
|||
506 | } |
||||
507 | |||||
508 | 32 | if ($path === '') { |
|||
509 | 2 | return []; |
|||
510 | } |
||||
511 | |||||
512 | 30 | if (!str_contains($path, $delimiter)) { |
|||
513 | 3 | if ($preserveDelimiterEscaping) { |
|||
514 | 1 | return [$path]; |
|||
515 | } |
||||
516 | |||||
517 | 2 | return [str_replace($escapeCharacter . $escapeCharacter, $escapeCharacter, $path)]; |
|||
518 | } |
||||
519 | |||||
520 | /** @psalm-var non-empty-list<array{0:string, 1:int}> $matches */ |
||||
521 | 27 | $matches = preg_split( |
|||
522 | 27 | sprintf( |
|||
523 | 27 | '/(?<!%1$s)((?>%1$s%1$s)*)%2$s/', |
|||
524 | 27 | preg_quote($escapeCharacter, '/'), |
|||
525 | 27 | preg_quote($delimiter, '/') |
|||
526 | 27 | ), |
|||
527 | 27 | $path, |
|||
528 | 27 | -1, |
|||
529 | 27 | PREG_SPLIT_OFFSET_CAPTURE |
|||
530 | 27 | ); |
|||
531 | 27 | $result = []; |
|||
532 | 27 | $countResults = count($matches); |
|||
533 | 27 | for ($i = 1; $i < $countResults; $i++) { |
|||
534 | 25 | $l = $matches[$i][1] - $matches[$i - 1][1] - strlen($matches[$i - 1][0]) - 1; |
|||
535 | 25 | $result[] = $matches[$i - 1][0] . ($l > 0 ? str_repeat($escapeCharacter, $l) : ''); |
|||
536 | } |
||||
537 | 27 | $result[] = $matches[$countResults - 1][0]; |
|||
538 | |||||
539 | 27 | if ($preserveDelimiterEscaping === true) { |
|||
540 | 1 | return $result; |
|||
541 | } |
||||
542 | |||||
543 | 26 | return array_map( |
|||
544 | 26 | static fn (string $key): string => str_replace( |
|||
545 | 26 | [ |
|||
546 | 26 | $escapeCharacter . $escapeCharacter, |
|||
547 | 26 | $escapeCharacter . $delimiter, |
|||
548 | 26 | ], |
|||
549 | 26 | [ |
|||
550 | 26 | $escapeCharacter, |
|||
551 | 26 | $delimiter, |
|||
552 | 26 | ], |
|||
553 | 26 | $key |
|||
554 | 26 | ), |
|||
555 | 26 | $result |
|||
556 | 26 | ); |
|||
557 | } |
||||
558 | |||||
559 | /** |
||||
560 | * Strip Unicode whitespace (with Unicode symbol property White_Space=yes) or other characters from the beginning and end of a string. |
||||
561 | * Input string and pattern are treated as UTF-8. |
||||
562 | * |
||||
563 | * @see https://en.wikipedia.org/wiki/Whitespace_character#Unicode |
||||
564 | * @see https://www.php.net/manual/function.preg-replace |
||||
565 | * |
||||
566 | * @param string|string[] $string The string or an array with strings. |
||||
567 | * @param string $pattern PCRE regex pattern to search for, as UTF-8 string. Use {@see preg_quote()} to quote `$pattern` if it contains |
||||
568 | * special regular expression characters. |
||||
569 | * |
||||
570 | * @psalm-template TKey of array-key |
||||
571 | * @psalm-param string|array<TKey, string> $string |
||||
572 | * @psalm-param non-empty-string $pattern |
||||
573 | * @psalm-return ($string is array ? array<TKey, string> : string) |
||||
574 | * |
||||
575 | * @return string|string[] |
||||
576 | */ |
||||
577 | 16 | public static function trim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
|||
578 | { |
||||
579 | 16 | self::ensureUtf8Pattern($pattern); |
|||
580 | |||||
581 | 15 | return preg_replace("#^[$pattern]+|[$pattern]+$#uD", '', $string); |
|||
582 | } |
||||
583 | |||||
584 | /** |
||||
585 | * Strip Unicode whitespace (with Unicode symbol property White_Space=yes) or other characters from the beginning of a string. |
||||
586 | * |
||||
587 | * @see self::trim() |
||||
588 | * |
||||
589 | * @param string|string[] $string The string or an array with strings. |
||||
590 | * @param string $pattern PCRE regex pattern to search for, as UTF-8 string. Use {@see preg_quote()} to quote `$pattern` if it contains |
||||
591 | * special regular expression characters. |
||||
592 | * |
||||
593 | * @psalm-template TKey of array-key |
||||
594 | * @psalm-param string|array<TKey, string> $string |
||||
595 | * @psalm-param non-empty-string $pattern |
||||
596 | * @psalm-return ($string is array ? array<TKey, string> : string) |
||||
597 | * |
||||
598 | * @return string|string[] |
||||
599 | */ |
||||
600 | 12 | public static function ltrim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
|||
601 | { |
||||
602 | 12 | self::ensureUtf8Pattern($pattern); |
|||
603 | |||||
604 | 12 | return preg_replace("#^[$pattern]+#u", '', $string); |
|||
605 | } |
||||
606 | |||||
607 | /** |
||||
608 | * Strip Unicode whitespace (with Unicode symbol property White_Space=yes) or other characters from the end of a string. |
||||
609 | * |
||||
610 | * @see self::trim() |
||||
611 | * |
||||
612 | * @param string|string[] $string The string or an array with strings. |
||||
613 | * @param string $pattern PCRE regex pattern to search for, as UTF-8 string. Use {@see preg_quote()} to quote `$pattern` if it contains |
||||
614 | * special regular expression characters. |
||||
615 | * |
||||
616 | * @psalm-template TKey of array-key |
||||
617 | * @psalm-param string|array<TKey, string> $string |
||||
618 | * @psalm-param non-empty-string $pattern |
||||
619 | * @psalm-return ($string is array ? array<TKey, string> : string) |
||||
620 | * |
||||
621 | * @return string|string[] |
||||
622 | */ |
||||
623 | 14 | public static function rtrim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
|||
624 | { |
||||
625 | 14 | self::ensureUtf8Pattern($pattern); |
|||
626 | |||||
627 | 14 | return preg_replace("#[$pattern]+$#uD", '', $string); |
|||
628 | } |
||||
629 | |||||
630 | /** |
||||
631 | * Returns the portion of the string that lies between the first occurrence of the `$start` string |
||||
632 | * and the last occurrence of the `$end` string after that. |
||||
633 | * |
||||
634 | * @param string $string The input string. |
||||
635 | * @param string $start The string marking the start of the portion to extract. |
||||
636 | * @param string|null $end The string marking the end of the portion to extract. |
||||
637 | * If the `$end` string is not provided, it defaults to the value of the `$start` string. |
||||
638 | * @return string|null The portion of the string between the first occurrence of |
||||
639 | * `$start` and the last occurrence of `$end`, or null if either `$start` or `$end` cannot be found. |
||||
640 | */ |
||||
641 | 14 | public static function findBetween(string $string, string $start, ?string $end = null): ?string |
|||
642 | { |
||||
643 | 14 | if ($end === null) { |
|||
644 | 1 | $end = $start; |
|||
645 | } |
||||
646 | |||||
647 | 14 | $startPos = mb_strpos($string, $start); |
|||
648 | |||||
649 | 14 | if ($startPos === false) { |
|||
650 | 3 | return null; |
|||
651 | } |
||||
652 | |||||
653 | 11 | $startPos += mb_strlen($start); |
|||
654 | 11 | $endPos = mb_strrpos($string, $end, $startPos); |
|||
655 | |||||
656 | 11 | if ($endPos === false) { |
|||
657 | 2 | return null; |
|||
658 | } |
||||
659 | |||||
660 | 9 | return mb_substr($string, $startPos, $endPos - $startPos); |
|||
661 | } |
||||
662 | |||||
663 | /** |
||||
664 | * Returns the portion of the string between the initial occurrence of the '$start' string |
||||
665 | * and the next occurrence of the '$end' string. |
||||
666 | * |
||||
667 | * @param string $string The input string. |
||||
668 | * @param string $start The string marking the beginning of the segment to extract. |
||||
669 | * @param string|null $end The string marking the termination of the segment. |
||||
670 | * If the '$end' string is not provided, it defaults to the value of the '$start' string. |
||||
671 | * @return string|null Extracted segment, or null if '$start' or '$end' is not present. |
||||
672 | */ |
||||
673 | 16 | public static function findBetweenFirst(string $string, string $start, ?string $end = null): ?string |
|||
674 | { |
||||
675 | 16 | if ($end === null) { |
|||
676 | 1 | $end = $start; |
|||
677 | } |
||||
678 | |||||
679 | 16 | $startPos = mb_strpos($string, $start); |
|||
680 | |||||
681 | 16 | if ($startPos === false) { |
|||
682 | 3 | return null; |
|||
683 | } |
||||
684 | |||||
685 | 13 | $startPos += mb_strlen($start); |
|||
686 | 13 | $endPos = mb_strpos($string, $end, $startPos); |
|||
687 | |||||
688 | 13 | if ($endPos === false) { |
|||
689 | 2 | return null; |
|||
690 | } |
||||
691 | |||||
692 | 11 | return mb_substr($string, $startPos, $endPos - $startPos); |
|||
693 | } |
||||
694 | |||||
695 | /** |
||||
696 | * Returns the portion of the string between the latest '$start' string |
||||
697 | * and the subsequent '$end' string. |
||||
698 | * |
||||
699 | * @param string $string The input string. |
||||
700 | * @param string $start The string marking the beginning of the segment to extract. |
||||
701 | * @param string|null $end The string marking the termination of the segment. |
||||
702 | * If the '$end' string is not provided, it defaults to the value of the '$start' string. |
||||
703 | * @return string|null Extracted segment, or null if '$start' or '$end' is not present. |
||||
704 | */ |
||||
705 | 16 | public static function findBetweenLast(string $string, string $start, ?string $end = null): ?string |
|||
706 | { |
||||
707 | 16 | if ($end === null) { |
|||
708 | 1 | $end = $start; |
|||
709 | } |
||||
710 | |||||
711 | 16 | $endPos = mb_strrpos($string, $end); |
|||
712 | |||||
713 | 16 | if ($endPos === false) { |
|||
714 | 3 | return null; |
|||
715 | } |
||||
716 | |||||
717 | 13 | $startPos = mb_strrpos(mb_substr($string, 0, $endPos), $start); |
|||
718 | |||||
719 | 13 | if ($startPos === false) { |
|||
720 | 2 | return null; |
|||
721 | } |
||||
722 | |||||
723 | 11 | $startPos += mb_strlen($start); |
|||
724 | |||||
725 | 11 | return mb_substr($string, $startPos, $endPos - $startPos); |
|||
726 | } |
||||
727 | |||||
728 | /** |
||||
729 | * Ensure the input string is a valid UTF-8 string. |
||||
730 | * |
||||
731 | * @param string $pattern The input string. |
||||
732 | * |
||||
733 | * @throws InvalidArgumentException |
||||
734 | */ |
||||
735 | 42 | private static function ensureUtf8Pattern(string $pattern): void |
|||
736 | { |
||||
737 | 42 | if (!preg_match('##u', $pattern)) { |
|||
738 | 1 | throw new InvalidArgumentException('Pattern is not a valid UTF-8 string.'); |
|||
739 | } |
||||
740 | } |
||||
741 | } |
||||
742 |