1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace Yiisoft\Strings; |
6
|
|
|
|
7
|
|
|
use InvalidArgumentException; |
8
|
|
|
|
9
|
|
|
use function array_map; |
10
|
|
|
use function array_slice; |
11
|
|
|
use function base64_decode; |
12
|
|
|
use function base64_encode; |
13
|
|
|
use function ceil; |
14
|
|
|
use function count; |
15
|
|
|
use function implode; |
16
|
|
|
use function max; |
17
|
|
|
use function mb_strlen; |
18
|
|
|
use function mb_strrpos; |
19
|
|
|
use function mb_strtolower; |
20
|
|
|
use function mb_strtoupper; |
21
|
|
|
use function mb_substr; |
22
|
|
|
use function preg_match; |
23
|
|
|
use function preg_quote; |
24
|
|
|
use function preg_replace; |
25
|
|
|
use function preg_split; |
26
|
|
|
use function rtrim; |
27
|
|
|
use function sprintf; |
28
|
|
|
use function str_ends_with; |
29
|
|
|
use function str_repeat; |
30
|
|
|
use function str_replace; |
31
|
|
|
use function str_starts_with; |
32
|
|
|
use function strlen; |
33
|
|
|
use function strtr; |
34
|
|
|
use function trim; |
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* Provides static methods to work with strings. |
38
|
|
|
*/ |
39
|
|
|
final class StringHelper |
40
|
|
|
{ |
41
|
|
|
public const DEFAULT_WHITESPACE_PATTERN = "\pC\pZ"; |
42
|
|
|
|
43
|
|
|
/** |
44
|
|
|
* Returns the number of bytes in the given string. |
45
|
|
|
* This method ensures the string is treated as a byte array even if `mbstring.func_overload` is turned on |
46
|
|
|
* by using {@see mb_strlen()}. |
47
|
|
|
* |
48
|
|
|
* @param string|null $input The string being measured for length. |
49
|
|
|
* |
50
|
|
|
* @return int The number of bytes in the given string. |
51
|
|
|
*/ |
52
|
2 |
|
public static function byteLength(string|null $input): int |
53
|
|
|
{ |
54
|
2 |
|
return mb_strlen((string)$input, '8bit'); |
55
|
|
|
} |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* Returns the portion of string specified by the start and length parameters. |
59
|
|
|
* This method ensures the string is treated as a byte array by using `mb_substr()`. |
60
|
|
|
* |
61
|
|
|
* @param string $input The input string. Must be one character or longer. |
62
|
|
|
* @param int $start The starting position. |
63
|
|
|
* @param int|null $length The desired portion length. If not specified or `null`, there will be |
64
|
|
|
* no limit on length i.e. the output will be until the end of the string. |
65
|
|
|
* |
66
|
|
|
* @return string The extracted part of string, or FALSE on failure or an empty string. |
67
|
|
|
* |
68
|
|
|
* @see https://www.php.net/manual/en/function.substr.php |
69
|
|
|
*/ |
70
|
1 |
|
public static function byteSubstring(string $input, int $start, int $length = null): string |
71
|
|
|
{ |
72
|
1 |
|
return mb_substr($input, $start, $length ?? mb_strlen($input, '8bit'), '8bit'); |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
/** |
76
|
|
|
* Returns the trailing name component of a path. |
77
|
|
|
* This method is similar to the php function `basename()` except that it will |
78
|
|
|
* treat both \ and / as directory separators, independent of the operating system. |
79
|
|
|
* This method was mainly created to work on php namespaces. When working with real |
80
|
|
|
* file paths, PHP's `basename()` should work fine for you. |
81
|
|
|
* Note: this method is not aware of the actual filesystem, or path components such as "..". |
82
|
|
|
* |
83
|
|
|
* @param string $path A path string. |
84
|
|
|
* @param string $suffix If the name component ends in suffix this will also be cut off. |
85
|
|
|
* |
86
|
|
|
* @return string The trailing name component of the given path. |
87
|
|
|
* |
88
|
|
|
* @see https://www.php.net/manual/en/function.basename.php |
89
|
|
|
*/ |
90
|
1 |
|
public static function baseName(string $path, string $suffix = ''): string |
91
|
|
|
{ |
92
|
1 |
|
$length = mb_strlen($suffix); |
93
|
1 |
|
if ($length > 0 && mb_substr($path, -$length) === $suffix) { |
94
|
1 |
|
$path = mb_substr($path, 0, -$length); |
95
|
|
|
} |
96
|
1 |
|
$path = rtrim(str_replace('\\', '/', $path), '/\\'); |
97
|
1 |
|
$position = mb_strrpos($path, '/'); |
98
|
1 |
|
if ($position !== false) { |
99
|
1 |
|
return mb_substr($path, $position + 1); |
100
|
|
|
} |
101
|
|
|
|
102
|
1 |
|
return $path; |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
/** |
106
|
|
|
* Returns parent directory's path. |
107
|
|
|
* This method is similar to `dirname()` except that it will treat |
108
|
|
|
* both \ and / as directory separators, independent of the operating system. |
109
|
|
|
* |
110
|
|
|
* @param string $path A path string. |
111
|
|
|
* |
112
|
|
|
* @return string The parent directory's path. |
113
|
|
|
* |
114
|
|
|
* @see https://www.php.net/manual/en/function.basename.php |
115
|
|
|
*/ |
116
|
1 |
|
public static function directoryName(string $path): string |
117
|
|
|
{ |
118
|
1 |
|
$position = mb_strrpos(str_replace('\\', '/', $path), '/'); |
119
|
1 |
|
if ($position !== false) { |
120
|
1 |
|
return mb_substr($path, 0, $position); |
121
|
|
|
} |
122
|
|
|
|
123
|
1 |
|
return ''; |
124
|
|
|
} |
125
|
|
|
|
126
|
|
|
/** |
127
|
|
|
* Get part of string. |
128
|
|
|
* |
129
|
|
|
* @param string $string To get substring from. |
130
|
|
|
* @param int $start Character to start at. |
131
|
|
|
* @param int|null $length Number of characters to get. |
132
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
133
|
|
|
* |
134
|
|
|
* @see https://php.net/manual/en/function.mb-substr.php |
135
|
|
|
*/ |
136
|
15 |
|
public static function substring(string $string, int $start, int $length = null, string $encoding = 'UTF-8'): string |
137
|
|
|
{ |
138
|
15 |
|
return mb_substr($string, $start, $length, $encoding); |
139
|
|
|
} |
140
|
|
|
|
141
|
|
|
/** |
142
|
|
|
* Replace text within a portion of a string. |
143
|
|
|
* |
144
|
|
|
* @param string $string The input string. |
145
|
|
|
* @param string $replacement The replacement string. |
146
|
|
|
* @param int $start Position to begin replacing substring at. |
147
|
|
|
* If start is non-negative, the replacing will begin at the start'th offset into string. |
148
|
|
|
* If start is negative, the replacing will begin at the start'th character from the end of string. |
149
|
|
|
* @param int|null $length Length of the substring to be replaced. |
150
|
|
|
* If given and is positive, it represents the length of the portion of string which is to be replaced. |
151
|
|
|
* If it is negative, it represents the number of characters from the end of string at which to stop replacing. |
152
|
|
|
* If it is not given, then it will default to the length of the string; i.e. end the replacing at the end of string. |
153
|
|
|
* If length is zero then this function will have the effect of inserting replacement into string at the given start offset. |
154
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
155
|
|
|
*/ |
156
|
9 |
|
public static function replaceSubstring( |
157
|
|
|
string $string, |
158
|
|
|
string $replacement, |
159
|
|
|
int $start, |
160
|
|
|
int|null $length = null, |
161
|
|
|
string $encoding = 'UTF-8', |
162
|
|
|
): string { |
163
|
9 |
|
$stringLength = mb_strlen($string, $encoding); |
164
|
|
|
|
165
|
9 |
|
if ($start < 0) { |
166
|
2 |
|
$start = max(0, $stringLength + $start); |
167
|
7 |
|
} elseif ($start > $stringLength) { |
168
|
1 |
|
$start = $stringLength; |
169
|
|
|
} |
170
|
|
|
|
171
|
9 |
|
if ($length !== null && $length < 0) { |
172
|
3 |
|
$length = max(0, $stringLength - $start + $length); |
173
|
6 |
|
} elseif ($length === null || $length > $stringLength) { |
174
|
5 |
|
$length = $stringLength; |
175
|
|
|
} |
176
|
|
|
|
177
|
9 |
|
if (($start + $length) > $stringLength) { |
178
|
4 |
|
$length = $stringLength - $start; |
179
|
|
|
} |
180
|
|
|
|
181
|
9 |
|
return mb_substr($string, 0, $start, $encoding) |
182
|
9 |
|
. $replacement |
183
|
9 |
|
. mb_substr($string, $start + $length, $stringLength - $start - $length, $encoding); |
184
|
|
|
} |
185
|
|
|
|
186
|
|
|
/** |
187
|
|
|
* Check if given string starts with specified substring. |
188
|
|
|
* Binary and multibyte safe. |
189
|
|
|
* |
190
|
|
|
* @param string $input Input string. |
191
|
|
|
* @param string|null $with Part to search inside the $string. |
192
|
|
|
* |
193
|
|
|
* @return bool Returns true if first input starts with second input, false otherwise. |
194
|
|
|
*/ |
195
|
19 |
|
public static function startsWith(string $input, string|null $with): bool |
196
|
|
|
{ |
197
|
19 |
|
return $with === null || str_starts_with($input, $with); |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
/** |
201
|
|
|
* Check if given string starts with specified substring ignoring case. |
202
|
|
|
* Binary and multibyte safe. |
203
|
|
|
* |
204
|
|
|
* @param string $input Input string. |
205
|
|
|
* @param string|null $with Part to search inside the $string. |
206
|
|
|
* |
207
|
|
|
* @return bool Returns true if first input starts with second input, false otherwise. |
208
|
|
|
*/ |
209
|
1 |
|
public static function startsWithIgnoringCase(string $input, string|null $with): bool |
210
|
|
|
{ |
211
|
1 |
|
$bytes = self::byteLength($with); |
212
|
|
|
|
213
|
1 |
|
if ($bytes === 0) { |
214
|
1 |
|
return true; |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
/** @psalm-suppress PossiblyNullArgument */ |
218
|
1 |
|
return self::lowercase(self::substring($input, 0, $bytes, '8bit')) === self::lowercase($with); |
|
|
|
|
219
|
|
|
} |
220
|
|
|
|
221
|
|
|
/** |
222
|
|
|
* Check if given string ends with specified substring. |
223
|
|
|
* Binary and multibyte safe. |
224
|
|
|
* |
225
|
|
|
* @param string $input Input string to check. |
226
|
|
|
* @param string|null $with Part to search inside of the $string. |
227
|
|
|
* |
228
|
|
|
* @return bool Returns true if first input ends with second input, false otherwise. |
229
|
|
|
*/ |
230
|
19 |
|
public static function endsWith(string $input, string|null $with): bool |
231
|
|
|
{ |
232
|
19 |
|
return $with === null || str_ends_with($input, $with); |
233
|
|
|
} |
234
|
|
|
|
235
|
|
|
/** |
236
|
|
|
* Check if given string ends with specified substring. |
237
|
|
|
* Binary and multibyte safe. |
238
|
|
|
* |
239
|
|
|
* @param string $input Input string to check. |
240
|
|
|
* @param string|null $with Part to search inside of the $string. |
241
|
|
|
* |
242
|
|
|
* @return bool Returns true if first input ends with second input, false otherwise. |
243
|
|
|
*/ |
244
|
1 |
|
public static function endsWithIgnoringCase(string $input, string|null $with): bool |
245
|
|
|
{ |
246
|
1 |
|
$bytes = self::byteLength($with); |
247
|
|
|
|
248
|
1 |
|
if ($bytes === 0) { |
249
|
1 |
|
return true; |
250
|
|
|
} |
251
|
|
|
|
252
|
|
|
/** @psalm-suppress PossiblyNullArgument */ |
253
|
1 |
|
return self::lowercase(mb_substr($input, -$bytes, mb_strlen($input, '8bit'), '8bit')) === self::lowercase($with); |
|
|
|
|
254
|
|
|
} |
255
|
|
|
|
256
|
|
|
/** |
257
|
|
|
* Truncates a string from the beginning to the number of characters specified. |
258
|
|
|
* |
259
|
|
|
* @param string $input String to process. |
260
|
|
|
* @param int $length Maximum length of the truncated string including trim marker. |
261
|
|
|
* @param string $trimMarker String to append to the beginning. |
262
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
263
|
|
|
*/ |
264
|
1 |
|
public static function truncateBegin(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
265
|
|
|
{ |
266
|
1 |
|
$inputLength = mb_strlen($input, $encoding); |
267
|
|
|
|
268
|
1 |
|
if ($inputLength <= $length) { |
269
|
1 |
|
return $input; |
270
|
|
|
} |
271
|
|
|
|
272
|
1 |
|
$trimMarkerLength = mb_strlen($trimMarker, $encoding); |
273
|
1 |
|
return self::replaceSubstring($input, $trimMarker, 0, -$length + $trimMarkerLength, $encoding); |
274
|
|
|
} |
275
|
|
|
|
276
|
|
|
/** |
277
|
|
|
* Truncates a string in the middle. Keeping start and end. |
278
|
|
|
* `StringHelper::truncateMiddle('Hello world number 2', 8)` produces "Hell…r 2". |
279
|
|
|
* |
280
|
|
|
* @param string $input The string to truncate. |
281
|
|
|
* @param int $length Maximum length of the truncated string including trim marker. |
282
|
|
|
* @param string $trimMarker String to append in the middle of truncated string. |
283
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
284
|
|
|
* |
285
|
|
|
* @return string The truncated string. |
286
|
|
|
*/ |
287
|
2 |
|
public static function truncateMiddle(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
288
|
|
|
{ |
289
|
2 |
|
$inputLength = mb_strlen($input, $encoding); |
290
|
|
|
|
291
|
2 |
|
if ($inputLength <= $length) { |
292
|
1 |
|
return $input; |
293
|
|
|
} |
294
|
|
|
|
295
|
1 |
|
$trimMarkerLength = mb_strlen($trimMarker, $encoding); |
296
|
1 |
|
$start = (int)ceil(($length - $trimMarkerLength) / 2); |
297
|
1 |
|
$end = $length - $start - $trimMarkerLength; |
298
|
|
|
|
299
|
1 |
|
return self::replaceSubstring($input, $trimMarker, $start, -$end, $encoding); |
300
|
|
|
} |
301
|
|
|
|
302
|
|
|
/** |
303
|
|
|
* Truncates a string from the end to the number of characters specified. |
304
|
|
|
* |
305
|
|
|
* @param string $input The string to truncate. |
306
|
|
|
* @param int $length Maximum length of the truncated string including trim marker. |
307
|
|
|
* @param string $trimMarker String to append to the end of truncated string. |
308
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
309
|
|
|
* |
310
|
|
|
* @return string The truncated string. |
311
|
|
|
*/ |
312
|
1 |
|
public static function truncateEnd(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
313
|
|
|
{ |
314
|
1 |
|
$inputLength = mb_strlen($input, $encoding); |
315
|
|
|
|
316
|
1 |
|
if ($inputLength <= $length) { |
317
|
1 |
|
return $input; |
318
|
|
|
} |
319
|
|
|
|
320
|
1 |
|
$trimMarkerLength = mb_strlen($trimMarker, $encoding); |
321
|
1 |
|
return rtrim(mb_substr($input, 0, $length - $trimMarkerLength, $encoding)) . $trimMarker; |
322
|
|
|
} |
323
|
|
|
|
324
|
|
|
/** |
325
|
|
|
* Truncates a string to the number of words specified. |
326
|
|
|
* |
327
|
|
|
* @param string $input The string to truncate. |
328
|
|
|
* @param int $count How many words from original string to include into truncated string. |
329
|
|
|
* @param string $trimMarker String to append to the end of truncated string. |
330
|
|
|
* |
331
|
|
|
* @return string The truncated string. |
332
|
|
|
*/ |
333
|
1 |
|
public static function truncateWords(string $input, int $count, string $trimMarker = '…'): string |
334
|
|
|
{ |
335
|
|
|
/** @psalm-var list<string> $words */ |
336
|
1 |
|
$words = preg_split('/(\s+)/u', trim($input), -1, PREG_SPLIT_DELIM_CAPTURE); |
337
|
1 |
|
if (count($words) / 2 > $count) { |
338
|
1 |
|
$words = array_slice($words, 0, ($count * 2) - 1); |
339
|
1 |
|
return implode('', $words) . $trimMarker; |
340
|
|
|
} |
341
|
|
|
|
342
|
1 |
|
return $input; |
343
|
|
|
} |
344
|
|
|
|
345
|
|
|
/** |
346
|
|
|
* Get string length. |
347
|
|
|
* |
348
|
|
|
* @param string $string String to calculate length for. |
349
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
350
|
|
|
* |
351
|
|
|
* @see https://php.net/manual/en/function.mb-strlen.php |
352
|
|
|
*/ |
353
|
1 |
|
public static function length(string $string, string $encoding = 'UTF-8'): int |
354
|
|
|
{ |
355
|
1 |
|
return mb_strlen($string, $encoding); |
356
|
|
|
} |
357
|
|
|
|
358
|
|
|
/** |
359
|
|
|
* Counts words in a string. |
360
|
|
|
*/ |
361
|
1 |
|
public static function countWords(string $input): int |
362
|
|
|
{ |
363
|
|
|
/** @var array $words */ |
364
|
1 |
|
$words = preg_split('/\s+/u', $input, -1, PREG_SPLIT_NO_EMPTY); |
365
|
1 |
|
return count($words); |
366
|
|
|
} |
367
|
|
|
|
368
|
|
|
/** |
369
|
|
|
* Make a string lowercase. |
370
|
|
|
* |
371
|
|
|
* @param string $string String to process. |
372
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
373
|
|
|
* |
374
|
|
|
* @see https://php.net/manual/en/function.mb-strtolower.php |
375
|
|
|
*/ |
376
|
3 |
|
public static function lowercase(string $string, string $encoding = 'UTF-8'): string |
377
|
|
|
{ |
378
|
3 |
|
return mb_strtolower($string, $encoding); |
379
|
|
|
} |
380
|
|
|
|
381
|
|
|
/** |
382
|
|
|
* Make a string uppercase. |
383
|
|
|
* |
384
|
|
|
* @param string $string String to process. |
385
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
386
|
|
|
* |
387
|
|
|
* @see https://php.net/manual/en/function.mb-strtoupper.php |
388
|
|
|
*/ |
389
|
15 |
|
public static function uppercase(string $string, string $encoding = 'UTF-8'): string |
390
|
|
|
{ |
391
|
15 |
|
return mb_strtoupper($string, $encoding); |
392
|
|
|
} |
393
|
|
|
|
394
|
|
|
/** |
395
|
|
|
* Make a string's first character uppercase. |
396
|
|
|
* |
397
|
|
|
* @param string $string The string to be processed. |
398
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
399
|
|
|
* |
400
|
|
|
* @see https://php.net/manual/en/function.ucfirst.php |
401
|
|
|
*/ |
402
|
14 |
|
public static function uppercaseFirstCharacter(string $string, string $encoding = 'UTF-8'): string |
403
|
|
|
{ |
404
|
14 |
|
$firstCharacter = self::substring($string, 0, 1, $encoding); |
405
|
14 |
|
$rest = self::substring($string, 1, null, $encoding); |
406
|
|
|
|
407
|
14 |
|
return self::uppercase($firstCharacter, $encoding) . $rest; |
408
|
|
|
} |
409
|
|
|
|
410
|
|
|
/** |
411
|
|
|
* Uppercase the first character of each word in a string. |
412
|
|
|
* |
413
|
|
|
* @param string $string The string to be processed. |
414
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
415
|
|
|
* |
416
|
|
|
* @see https://php.net/manual/en/function.ucwords.php |
417
|
|
|
*/ |
418
|
10 |
|
public static function uppercaseFirstCharacterInEachWord(string $string, string $encoding = 'UTF-8'): string |
419
|
|
|
{ |
420
|
10 |
|
$words = preg_split('/\s/u', $string, -1, PREG_SPLIT_NO_EMPTY); |
421
|
|
|
|
422
|
10 |
|
$wordsWithUppercaseFirstCharacter = array_map( |
423
|
10 |
|
static fn (string $word) => self::uppercaseFirstCharacter($word, $encoding), |
424
|
10 |
|
$words |
425
|
10 |
|
); |
426
|
|
|
|
427
|
10 |
|
return implode(' ', $wordsWithUppercaseFirstCharacter); |
428
|
|
|
} |
429
|
|
|
|
430
|
|
|
/** |
431
|
|
|
* Encodes string into "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648). |
432
|
|
|
* |
433
|
|
|
* > Note: Base 64 padding `=` may be at the end of the returned string. |
434
|
|
|
* > `=` is not transparent to URL encoding. |
435
|
|
|
* |
436
|
|
|
* @see https://tools.ietf.org/html/rfc4648#page-7 |
437
|
|
|
* |
438
|
|
|
* @param string $input The string to encode. |
439
|
|
|
* |
440
|
|
|
* @return string Encoded string. |
441
|
|
|
*/ |
442
|
4 |
|
public static function base64UrlEncode(string $input): string |
443
|
|
|
{ |
444
|
4 |
|
return strtr(base64_encode($input), '+/', '-_'); |
445
|
|
|
} |
446
|
|
|
|
447
|
|
|
/** |
448
|
|
|
* Decodes "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648). |
449
|
|
|
* |
450
|
|
|
* @see https://tools.ietf.org/html/rfc4648#page-7 |
451
|
|
|
* |
452
|
|
|
* @param string $input Encoded string. |
453
|
|
|
* |
454
|
|
|
* @return string Decoded string. |
455
|
|
|
*/ |
456
|
4 |
|
public static function base64UrlDecode(string $input): string |
457
|
|
|
{ |
458
|
4 |
|
return base64_decode(strtr($input, '-_', '+/')); |
459
|
|
|
} |
460
|
|
|
|
461
|
|
|
/** |
462
|
|
|
* Split a string to array with non-empty lines. |
463
|
|
|
* Whitespace from the beginning and end of a each line will be stripped. |
464
|
|
|
* |
465
|
|
|
* @param string $string The input string. |
466
|
|
|
* @param string $separator The boundary string. It is a part of regular expression |
467
|
|
|
* so should be taken into account or properly escaped with {@see preg_quote()}. |
468
|
|
|
*/ |
469
|
16 |
|
public static function split(string $string, string $separator = '\R'): array |
470
|
|
|
{ |
471
|
16 |
|
$string = preg_replace('(^\s*|\s*$)', '', $string); |
472
|
16 |
|
return preg_split('~\s*' . $separator . '\s*~u', $string, -1, PREG_SPLIT_NO_EMPTY); |
473
|
|
|
} |
474
|
|
|
|
475
|
|
|
/** |
476
|
|
|
* @param string $path The path of where do you want to write a value to `$array`. The path can be described by |
477
|
|
|
* a string when each key should be separated by delimiter. If a path item contains delimiter, it can be escaped |
478
|
|
|
* with "\" (backslash) or a custom delimiter can be used. |
479
|
|
|
* @param string $delimiter A separator, used to parse string key for embedded object property retrieving. Defaults |
480
|
|
|
* to "." (dot). |
481
|
|
|
* @param string $escapeCharacter An escape character, used to escape delimiter. Defaults to "\" (backslash). |
482
|
|
|
* @param bool $preserveDelimiterEscaping Whether to preserve delimiter escaping in the items of final array (in |
483
|
|
|
* case of using string as an input). When `false`, "\" (backslashes) are removed. For a "." as delimiter, "." |
484
|
|
|
* becomes "\.". Defaults to `false`. |
485
|
|
|
* |
486
|
|
|
* @return string[] |
487
|
|
|
* |
488
|
|
|
* @psalm-return list<string> |
489
|
|
|
*/ |
490
|
35 |
|
public static function parsePath( |
491
|
|
|
string $path, |
492
|
|
|
string $delimiter = '.', |
493
|
|
|
string $escapeCharacter = '\\', |
494
|
|
|
bool $preserveDelimiterEscaping = false |
495
|
|
|
): array { |
496
|
35 |
|
if (strlen($delimiter) !== 1) { |
497
|
1 |
|
throw new InvalidArgumentException('Only 1 character is allowed for delimiter.'); |
498
|
|
|
} |
499
|
|
|
|
500
|
34 |
|
if (strlen($escapeCharacter) !== 1) { |
501
|
1 |
|
throw new InvalidArgumentException('Only 1 escape character is allowed.'); |
502
|
|
|
} |
503
|
|
|
|
504
|
33 |
|
if ($delimiter === $escapeCharacter) { |
505
|
1 |
|
throw new InvalidArgumentException('Delimiter and escape character must be different.'); |
506
|
|
|
} |
507
|
|
|
|
508
|
32 |
|
if ($path === '') { |
509
|
2 |
|
return []; |
510
|
|
|
} |
511
|
|
|
|
512
|
30 |
|
if (!str_contains($path, $delimiter)) { |
513
|
3 |
|
if ($preserveDelimiterEscaping) { |
514
|
1 |
|
return [$path]; |
515
|
|
|
} |
516
|
|
|
|
517
|
2 |
|
return [str_replace($escapeCharacter . $escapeCharacter, $escapeCharacter, $path)]; |
518
|
|
|
} |
519
|
|
|
|
520
|
|
|
/** @psalm-var non-empty-list<array{0:string, 1:int}> $matches */ |
521
|
27 |
|
$matches = preg_split( |
522
|
27 |
|
sprintf( |
523
|
27 |
|
'/(?<!%1$s)((?>%1$s%1$s)*)%2$s/', |
524
|
27 |
|
preg_quote($escapeCharacter, '/'), |
525
|
27 |
|
preg_quote($delimiter, '/') |
526
|
27 |
|
), |
527
|
27 |
|
$path, |
528
|
27 |
|
-1, |
529
|
27 |
|
PREG_SPLIT_OFFSET_CAPTURE |
530
|
27 |
|
); |
531
|
27 |
|
$result = []; |
532
|
27 |
|
$countResults = count($matches); |
533
|
27 |
|
for ($i = 1; $i < $countResults; $i++) { |
534
|
25 |
|
$l = $matches[$i][1] - $matches[$i - 1][1] - strlen($matches[$i - 1][0]) - 1; |
535
|
25 |
|
$result[] = $matches[$i - 1][0] . ($l > 0 ? str_repeat($escapeCharacter, $l) : ''); |
536
|
|
|
} |
537
|
27 |
|
$result[] = $matches[$countResults - 1][0]; |
538
|
|
|
|
539
|
27 |
|
if ($preserveDelimiterEscaping === true) { |
540
|
1 |
|
return $result; |
541
|
|
|
} |
542
|
|
|
|
543
|
26 |
|
return array_map( |
544
|
26 |
|
static fn (string $key): string => str_replace( |
545
|
26 |
|
[ |
546
|
26 |
|
$escapeCharacter . $escapeCharacter, |
547
|
26 |
|
$escapeCharacter . $delimiter, |
548
|
26 |
|
], |
549
|
26 |
|
[ |
550
|
26 |
|
$escapeCharacter, |
551
|
26 |
|
$delimiter, |
552
|
26 |
|
], |
553
|
26 |
|
$key |
554
|
26 |
|
), |
555
|
26 |
|
$result |
556
|
26 |
|
); |
557
|
|
|
} |
558
|
|
|
|
559
|
|
|
/** |
560
|
|
|
* Strip Unicode whitespace (with Unicode symbol property White_Space=yes) or other characters from the beginning and end of a string. |
561
|
|
|
* Input string and pattern are treated as UTF-8. |
562
|
|
|
* |
563
|
|
|
* @see https://en.wikipedia.org/wiki/Whitespace_character#Unicode |
564
|
|
|
* @see https://www.php.net/manual/function.preg-replace |
565
|
|
|
* |
566
|
|
|
* @param string|string[] $string The string or an array with strings. |
567
|
|
|
* @param string $pattern PCRE regex pattern to search for, as UTF-8 string. Use {@see preg_quote()} to quote `$pattern` if it contains |
568
|
|
|
* special regular expression characters. |
569
|
|
|
* |
570
|
|
|
* @psalm-template TKey of array-key |
571
|
|
|
* @psalm-param string|array<TKey, string> $string |
572
|
|
|
* @psalm-param non-empty-string $pattern |
573
|
|
|
* @psalm-return ($string is array ? array<TKey, string> : string) |
574
|
|
|
* |
575
|
|
|
* @return string|string[] |
576
|
|
|
*/ |
577
|
16 |
|
public static function trim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
578
|
|
|
{ |
579
|
16 |
|
self::ensureUtf8Pattern($pattern); |
580
|
|
|
|
581
|
15 |
|
return preg_replace("#^[$pattern]+|[$pattern]+$#uD", '', $string); |
582
|
|
|
} |
583
|
|
|
|
584
|
|
|
/** |
585
|
|
|
* Strip Unicode whitespace (with Unicode symbol property White_Space=yes) or other characters from the beginning of a string. |
586
|
|
|
* |
587
|
|
|
* @see self::trim() |
588
|
|
|
* |
589
|
|
|
* @param string|string[] $string The string or an array with strings. |
590
|
|
|
* @param string $pattern PCRE regex pattern to search for, as UTF-8 string. Use {@see preg_quote()} to quote `$pattern` if it contains |
591
|
|
|
* special regular expression characters. |
592
|
|
|
* |
593
|
|
|
* @psalm-template TKey of array-key |
594
|
|
|
* @psalm-param string|array<TKey, string> $string |
595
|
|
|
* @psalm-param non-empty-string $pattern |
596
|
|
|
* @psalm-return ($string is array ? array<TKey, string> : string) |
597
|
|
|
* |
598
|
|
|
* @return string|string[] |
599
|
|
|
*/ |
600
|
12 |
|
public static function ltrim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
601
|
|
|
{ |
602
|
12 |
|
self::ensureUtf8Pattern($pattern); |
603
|
|
|
|
604
|
12 |
|
return preg_replace("#^[$pattern]+#u", '', $string); |
605
|
|
|
} |
606
|
|
|
|
607
|
|
|
/** |
608
|
|
|
* Strip Unicode whitespace (with Unicode symbol property White_Space=yes) or other characters from the end of a string. |
609
|
|
|
* |
610
|
|
|
* @see self::trim() |
611
|
|
|
* |
612
|
|
|
* @param string|string[] $string The string or an array with strings. |
613
|
|
|
* @param string $pattern PCRE regex pattern to search for, as UTF-8 string. Use {@see preg_quote()} to quote `$pattern` if it contains |
614
|
|
|
* special regular expression characters. |
615
|
|
|
* |
616
|
|
|
* @psalm-template TKey of array-key |
617
|
|
|
* @psalm-param string|array<TKey, string> $string |
618
|
|
|
* @psalm-param non-empty-string $pattern |
619
|
|
|
* @psalm-return ($string is array ? array<TKey, string> : string) |
620
|
|
|
* |
621
|
|
|
* @return string|string[] |
622
|
|
|
*/ |
623
|
14 |
|
public static function rtrim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
624
|
|
|
{ |
625
|
14 |
|
self::ensureUtf8Pattern($pattern); |
626
|
|
|
|
627
|
14 |
|
return preg_replace("#[$pattern]+$#uD", '', $string); |
628
|
|
|
} |
629
|
|
|
|
630
|
|
|
/** |
631
|
|
|
* Returns the portion of the string that lies between the first occurrence of the `$start` string |
632
|
|
|
* and the last occurrence of the `$end` string after that. |
633
|
|
|
* |
634
|
|
|
* @param string $string The input string. |
635
|
|
|
* @param string $start The string marking the start of the portion to extract. |
636
|
|
|
* @param string|null $end The string marking the end of the portion to extract. |
637
|
|
|
* If the `$end` string is not provided, it defaults to the value of the `$start` string. |
638
|
|
|
* @return string|null The portion of the string between the first occurrence of |
639
|
|
|
* `$start` and the last occurrence of `$end`, or null if either `$start` or `$end` cannot be found. |
640
|
|
|
*/ |
641
|
14 |
|
public static function findBetween(string $string, string $start, ?string $end = null): ?string |
642
|
|
|
{ |
643
|
14 |
|
if ($end === null) { |
644
|
1 |
|
$end = $start; |
645
|
|
|
} |
646
|
|
|
|
647
|
14 |
|
$startPos = mb_strpos($string, $start); |
648
|
|
|
|
649
|
14 |
|
if ($startPos === false) { |
650
|
3 |
|
return null; |
651
|
|
|
} |
652
|
|
|
|
653
|
11 |
|
$startPos += mb_strlen($start); |
654
|
11 |
|
$endPos = mb_strrpos($string, $end, $startPos); |
655
|
|
|
|
656
|
11 |
|
if ($endPos === false) { |
657
|
2 |
|
return null; |
658
|
|
|
} |
659
|
|
|
|
660
|
9 |
|
return mb_substr($string, $startPos, $endPos - $startPos); |
661
|
|
|
} |
662
|
|
|
|
663
|
|
|
/** |
664
|
|
|
* Returns the portion of the string between the initial occurrence of the '$start' string |
665
|
|
|
* and the next occurrence of the '$end' string. |
666
|
|
|
* |
667
|
|
|
* @param string $string The input string. |
668
|
|
|
* @param string $start The string marking the beginning of the segment to extract. |
669
|
|
|
* @param string|null $end The string marking the termination of the segment. |
670
|
|
|
* If the '$end' string is not provided, it defaults to the value of the '$start' string. |
671
|
|
|
* @return string|null Extracted segment, or null if '$start' or '$end' is not present. |
672
|
|
|
*/ |
673
|
16 |
|
public static function findBetweenFirst(string $string, string $start, ?string $end = null): ?string |
674
|
|
|
{ |
675
|
16 |
|
if ($end === null) { |
676
|
1 |
|
$end = $start; |
677
|
|
|
} |
678
|
|
|
|
679
|
16 |
|
$startPos = mb_strpos($string, $start); |
680
|
|
|
|
681
|
16 |
|
if ($startPos === false) { |
682
|
3 |
|
return null; |
683
|
|
|
} |
684
|
|
|
|
685
|
13 |
|
$startPos += mb_strlen($start); |
686
|
13 |
|
$endPos = mb_strpos($string, $end, $startPos); |
687
|
|
|
|
688
|
13 |
|
if ($endPos === false) { |
689
|
2 |
|
return null; |
690
|
|
|
} |
691
|
|
|
|
692
|
11 |
|
return mb_substr($string, $startPos, $endPos - $startPos); |
693
|
|
|
} |
694
|
|
|
|
695
|
|
|
/** |
696
|
|
|
* Returns the portion of the string between the latest '$start' string |
697
|
|
|
* and the subsequent '$end' string. |
698
|
|
|
* |
699
|
|
|
* @param string $string The input string. |
700
|
|
|
* @param string $start The string marking the beginning of the segment to extract. |
701
|
|
|
* @param string|null $end The string marking the termination of the segment. |
702
|
|
|
* If the '$end' string is not provided, it defaults to the value of the '$start' string. |
703
|
|
|
* @return string|null Extracted segment, or null if '$start' or '$end' is not present. |
704
|
|
|
*/ |
705
|
16 |
|
public static function findBetweenLast(string $string, string $start, ?string $end = null): ?string |
706
|
|
|
{ |
707
|
16 |
|
if ($end === null) { |
708
|
1 |
|
$end = $start; |
709
|
|
|
} |
710
|
|
|
|
711
|
16 |
|
$endPos = mb_strrpos($string, $end); |
712
|
|
|
|
713
|
16 |
|
if ($endPos === false) { |
714
|
3 |
|
return null; |
715
|
|
|
} |
716
|
|
|
|
717
|
13 |
|
$startPos = mb_strrpos(mb_substr($string, 0, $endPos), $start); |
718
|
|
|
|
719
|
13 |
|
if ($startPos === false) { |
720
|
2 |
|
return null; |
721
|
|
|
} |
722
|
|
|
|
723
|
11 |
|
$startPos += mb_strlen($start); |
724
|
|
|
|
725
|
11 |
|
return mb_substr($string, $startPos, $endPos - $startPos); |
726
|
|
|
} |
727
|
|
|
|
728
|
|
|
/** |
729
|
|
|
* Ensure the input string is a valid UTF-8 string. |
730
|
|
|
* |
731
|
|
|
* @param string $pattern The input string. |
732
|
|
|
* |
733
|
|
|
* @throws InvalidArgumentException |
734
|
|
|
*/ |
735
|
42 |
|
private static function ensureUtf8Pattern(string $pattern): void |
736
|
|
|
{ |
737
|
42 |
|
if (!preg_match('##u', $pattern)) { |
738
|
1 |
|
throw new InvalidArgumentException('Pattern is not a valid UTF-8 string.'); |
739
|
|
|
} |
740
|
|
|
} |
741
|
|
|
} |
742
|
|
|
|