1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace Yiisoft\Strings; |
6
|
|
|
|
7
|
|
|
use InvalidArgumentException; |
8
|
|
|
|
9
|
|
|
use function array_slice; |
10
|
|
|
use function count; |
11
|
|
|
use function function_exists; |
12
|
|
|
use function max; |
13
|
|
|
use function mb_strlen; |
14
|
|
|
use function mb_strtolower; |
15
|
|
|
use function mb_strtoupper; |
16
|
|
|
use function mb_substr; |
17
|
|
|
use function preg_match; |
18
|
|
|
use function preg_replace; |
19
|
|
|
use function str_ends_with; |
20
|
|
|
use function str_starts_with; |
21
|
|
|
use function strlen; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* Provides static methods to work with strings. |
25
|
|
|
*/ |
26
|
|
|
final class StringHelper |
27
|
|
|
{ |
28
|
|
|
public const DEFAULT_WHITESPACE_PATTERN = "\pC\pZ"; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* Returns the number of bytes in the given string. |
32
|
|
|
* This method ensures the string is treated as a byte array even if `mbstring.func_overload` is turned on |
33
|
|
|
* by using {@see mb_strlen()}. |
34
|
|
|
* |
35
|
|
|
* @param string|null $input The string being measured for length. |
36
|
|
|
* |
37
|
|
|
* @return int The number of bytes in the given string. |
38
|
|
|
*/ |
39
|
2 |
|
public static function byteLength(?string $input): int |
40
|
|
|
{ |
41
|
2 |
|
return mb_strlen((string)$input, '8bit'); |
42
|
|
|
} |
43
|
|
|
|
44
|
|
|
/** |
45
|
|
|
* Returns the portion of string specified by the start and length parameters. |
46
|
|
|
* This method ensures the string is treated as a byte array by using `mb_substr()`. |
47
|
|
|
* |
48
|
|
|
* @param string $input The input string. Must be one character or longer. |
49
|
|
|
* @param int $start The starting position. |
50
|
|
|
* @param int|null $length The desired portion length. If not specified or `null`, there will be |
51
|
|
|
* no limit on length i.e. the output will be until the end of the string. |
52
|
|
|
* |
53
|
|
|
* @return string The extracted part of string, or FALSE on failure or an empty string. |
54
|
|
|
* |
55
|
|
|
* @see https://www.php.net/manual/en/function.substr.php |
56
|
|
|
*/ |
57
|
1 |
|
public static function byteSubstring(string $input, int $start, int $length = null): string |
58
|
|
|
{ |
59
|
1 |
|
return mb_substr($input, $start, $length ?? mb_strlen($input, '8bit'), '8bit'); |
60
|
|
|
} |
61
|
|
|
|
62
|
|
|
/** |
63
|
|
|
* Returns the trailing name component of a path. |
64
|
|
|
* This method is similar to the php function `basename()` except that it will |
65
|
|
|
* treat both \ and / as directory separators, independent of the operating system. |
66
|
|
|
* This method was mainly created to work on php namespaces. When working with real |
67
|
|
|
* file paths, PHP's `basename()` should work fine for you. |
68
|
|
|
* Note: this method is not aware of the actual filesystem, or path components such as "..". |
69
|
|
|
* |
70
|
|
|
* @param string $path A path string. |
71
|
|
|
* @param string $suffix If the name component ends in suffix this will also be cut off. |
72
|
|
|
* |
73
|
|
|
* @return string The trailing name component of the given path. |
74
|
|
|
* |
75
|
|
|
* @see https://www.php.net/manual/en/function.basename.php |
76
|
|
|
*/ |
77
|
1 |
|
public static function baseName(string $path, string $suffix = ''): string |
78
|
|
|
{ |
79
|
1 |
|
$length = mb_strlen($suffix); |
80
|
1 |
|
if ($length > 0 && mb_substr($path, -$length) === $suffix) { |
81
|
1 |
|
$path = mb_substr($path, 0, -$length); |
82
|
|
|
} |
83
|
1 |
|
$path = rtrim(str_replace('\\', '/', $path), '/\\'); |
84
|
1 |
|
$position = mb_strrpos($path, '/'); |
85
|
1 |
|
if ($position !== false) { |
86
|
1 |
|
return mb_substr($path, $position + 1); |
87
|
|
|
} |
88
|
|
|
|
89
|
1 |
|
return $path; |
90
|
|
|
} |
91
|
|
|
|
92
|
|
|
/** |
93
|
|
|
* Returns parent directory's path. |
94
|
|
|
* This method is similar to `dirname()` except that it will treat |
95
|
|
|
* both \ and / as directory separators, independent of the operating system. |
96
|
|
|
* |
97
|
|
|
* @param string $path A path string. |
98
|
|
|
* |
99
|
|
|
* @return string The parent directory's path. |
100
|
|
|
* |
101
|
|
|
* @see https://www.php.net/manual/en/function.basename.php |
102
|
|
|
*/ |
103
|
1 |
|
public static function directoryName(string $path): string |
104
|
|
|
{ |
105
|
1 |
|
$position = mb_strrpos(str_replace('\\', '/', $path), '/'); |
106
|
1 |
|
if ($position !== false) { |
107
|
1 |
|
return mb_substr($path, 0, $position); |
108
|
|
|
} |
109
|
|
|
|
110
|
1 |
|
return ''; |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
/** |
114
|
|
|
* Get part of string. |
115
|
|
|
* |
116
|
|
|
* @param string $string To get substring from. |
117
|
|
|
* @param int $start Character to start at. |
118
|
|
|
* @param int|null $length Number of characters to get. |
119
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
120
|
|
|
* |
121
|
|
|
* @see https://php.net/manual/en/function.mb-substr.php |
122
|
|
|
* |
123
|
|
|
* @return string |
124
|
|
|
*/ |
125
|
15 |
|
public static function substring(string $string, int $start, int $length = null, string $encoding = 'UTF-8'): string |
126
|
|
|
{ |
127
|
15 |
|
return mb_substr($string, $start, $length, $encoding); |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
/** |
131
|
|
|
* Replace text within a portion of a string. |
132
|
|
|
* |
133
|
|
|
* @param string $string The input string. |
134
|
|
|
* @param string $replacement The replacement string. |
135
|
|
|
* @param int $start Position to begin replacing substring at. |
136
|
|
|
* If start is non-negative, the replacing will begin at the start'th offset into string. |
137
|
|
|
* If start is negative, the replacing will begin at the start'th character from the end of string. |
138
|
|
|
* @param int|null $length Length of the substring to be replaced. |
139
|
|
|
* If given and is positive, it represents the length of the portion of string which is to be replaced. |
140
|
|
|
* If it is negative, it represents the number of characters from the end of string at which to stop replacing. |
141
|
|
|
* If it is not given, then it will default to the length of the string; i.e. end the replacing at the end of string. |
142
|
|
|
* If length is zero then this function will have the effect of inserting replacement into string at the given start offset. |
143
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
144
|
|
|
* |
145
|
|
|
* @return string |
146
|
|
|
*/ |
147
|
9 |
|
public static function replaceSubstring(string $string, string $replacement, int $start, ?int $length = null, string $encoding = 'UTF-8'): string |
148
|
|
|
{ |
149
|
9 |
|
$stringLength = mb_strlen($string, $encoding); |
150
|
|
|
|
151
|
9 |
|
if ($start < 0) { |
152
|
2 |
|
$start = max(0, $stringLength + $start); |
153
|
7 |
|
} elseif ($start > $stringLength) { |
154
|
1 |
|
$start = $stringLength; |
155
|
|
|
} |
156
|
|
|
|
157
|
9 |
|
if ($length !== null && $length < 0) { |
158
|
3 |
|
$length = max(0, $stringLength - $start + $length); |
159
|
6 |
|
} elseif ($length === null || $length > $stringLength) { |
160
|
5 |
|
$length = $stringLength; |
161
|
|
|
} |
162
|
|
|
|
163
|
9 |
|
if (($start + $length) > $stringLength) { |
164
|
4 |
|
$length = $stringLength - $start; |
165
|
|
|
} |
166
|
|
|
|
167
|
9 |
|
return mb_substr($string, 0, $start, $encoding) . $replacement . mb_substr($string, $start + $length, $stringLength - $start - $length, $encoding); |
168
|
|
|
} |
169
|
|
|
|
170
|
|
|
/** |
171
|
|
|
* Check if given string starts with specified substring. |
172
|
|
|
* Binary and multibyte safe. |
173
|
|
|
* |
174
|
|
|
* @param string $input Input string. |
175
|
|
|
* @param string|null $with Part to search inside the $string. |
176
|
|
|
* |
177
|
|
|
* @return bool Returns true if first input starts with second input, false otherwise. |
178
|
|
|
*/ |
179
|
19 |
|
public static function startsWith(string $input, ?string $with): bool |
180
|
|
|
{ |
181
|
19 |
|
if ($with === null) { |
182
|
1 |
|
return true; |
183
|
|
|
} |
184
|
|
|
|
185
|
18 |
|
if (function_exists('\str_starts_with')) { |
186
|
18 |
|
return str_starts_with($input, $with); |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
$bytes = self::byteLength($with); |
190
|
|
|
if ($bytes === 0) { |
191
|
|
|
return true; |
192
|
|
|
} |
193
|
|
|
|
194
|
|
|
return strncmp($input, $with, $bytes) === 0; |
195
|
|
|
} |
196
|
|
|
|
197
|
|
|
/** |
198
|
|
|
* Check if given string starts with specified substring ignoring case. |
199
|
|
|
* Binary and multibyte safe. |
200
|
|
|
* |
201
|
|
|
* @param string $input Input string. |
202
|
|
|
* @param string|null $with Part to search inside the $string. |
203
|
|
|
* |
204
|
|
|
* @return bool Returns true if first input starts with second input, false otherwise. |
205
|
|
|
*/ |
206
|
1 |
|
public static function startsWithIgnoringCase(string $input, ?string $with): bool |
207
|
|
|
{ |
208
|
1 |
|
$bytes = self::byteLength($with); |
209
|
1 |
|
if ($bytes === 0) { |
210
|
1 |
|
return true; |
211
|
|
|
} |
212
|
|
|
|
213
|
|
|
/** |
214
|
|
|
* @psalm-suppress PossiblyNullArgument |
215
|
|
|
*/ |
216
|
1 |
|
return self::lowercase(self::substring($input, 0, $bytes, '8bit')) === self::lowercase($with); |
|
|
|
|
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
/** |
220
|
|
|
* Check if given string ends with specified substring. |
221
|
|
|
* Binary and multibyte safe. |
222
|
|
|
* |
223
|
|
|
* @param string $input Input string to check. |
224
|
|
|
* @param string|null $with Part to search inside of the $string. |
225
|
|
|
* |
226
|
|
|
* @return bool Returns true if first input ends with second input, false otherwise. |
227
|
|
|
*/ |
228
|
19 |
|
public static function endsWith(string $input, ?string $with): bool |
229
|
|
|
{ |
230
|
19 |
|
if ($with === null) { |
231
|
1 |
|
return true; |
232
|
|
|
} |
233
|
|
|
|
234
|
18 |
|
if (function_exists('\str_ends_with')) { |
235
|
18 |
|
return str_ends_with($input, $with); |
236
|
|
|
} |
237
|
|
|
|
238
|
|
|
$bytes = self::byteLength($with); |
239
|
|
|
if ($bytes === 0) { |
240
|
|
|
return true; |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
// Warning check, see https://php.net/manual/en/function.substr-compare.php#refsect1-function.substr-compare-returnvalues |
244
|
|
|
if (self::byteLength($input) < $bytes) { |
245
|
|
|
return false; |
246
|
|
|
} |
247
|
|
|
|
248
|
|
|
return substr_compare($input, $with, -$bytes, $bytes) === 0; |
249
|
|
|
} |
250
|
|
|
|
251
|
|
|
/** |
252
|
|
|
* Check if given string ends with specified substring. |
253
|
|
|
* Binary and multibyte safe. |
254
|
|
|
* |
255
|
|
|
* @param string $input Input string to check. |
256
|
|
|
* @param string|null $with Part to search inside of the $string. |
257
|
|
|
* |
258
|
|
|
* @return bool Returns true if first input ends with second input, false otherwise. |
259
|
|
|
*/ |
260
|
1 |
|
public static function endsWithIgnoringCase(string $input, ?string $with): bool |
261
|
|
|
{ |
262
|
1 |
|
$bytes = self::byteLength($with); |
263
|
1 |
|
if ($bytes === 0) { |
264
|
1 |
|
return true; |
265
|
|
|
} |
266
|
|
|
|
267
|
|
|
/** |
268
|
|
|
* @psalm-suppress PossiblyNullArgument |
269
|
|
|
*/ |
270
|
1 |
|
return self::lowercase(mb_substr($input, -$bytes, mb_strlen($input, '8bit'), '8bit')) === self::lowercase($with); |
|
|
|
|
271
|
|
|
} |
272
|
|
|
|
273
|
|
|
/** |
274
|
|
|
* Truncates a string from the beginning to the number of characters specified. |
275
|
|
|
* |
276
|
|
|
* @param string $input String to process. |
277
|
|
|
* @param int $length Maximum length of the truncated string including trim marker. |
278
|
|
|
* @param string $trimMarker String to append to the beginning. |
279
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
280
|
|
|
* |
281
|
|
|
* @return string |
282
|
|
|
*/ |
283
|
1 |
|
public static function truncateBegin(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
284
|
|
|
{ |
285
|
1 |
|
$inputLength = mb_strlen($input, $encoding); |
286
|
|
|
|
287
|
1 |
|
if ($inputLength <= $length) { |
288
|
1 |
|
return $input; |
289
|
|
|
} |
290
|
|
|
|
291
|
1 |
|
$trimMarkerLength = mb_strlen($trimMarker, $encoding); |
292
|
1 |
|
return self::replaceSubstring($input, $trimMarker, 0, -$length + $trimMarkerLength, $encoding); |
293
|
|
|
} |
294
|
|
|
|
295
|
|
|
/** |
296
|
|
|
* Truncates a string in the middle. Keeping start and end. |
297
|
|
|
* `StringHelper::truncateMiddle('Hello world number 2', 8)` produces "Hell…r 2". |
298
|
|
|
* |
299
|
|
|
* @param string $input The string to truncate. |
300
|
|
|
* @param int $length Maximum length of the truncated string including trim marker. |
301
|
|
|
* @param string $trimMarker String to append in the middle of truncated string. |
302
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
303
|
|
|
* |
304
|
|
|
* @return string The truncated string. |
305
|
|
|
*/ |
306
|
2 |
|
public static function truncateMiddle(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
307
|
|
|
{ |
308
|
2 |
|
$inputLength = mb_strlen($input, $encoding); |
309
|
|
|
|
310
|
2 |
|
if ($inputLength <= $length) { |
311
|
1 |
|
return $input; |
312
|
|
|
} |
313
|
|
|
|
314
|
1 |
|
$trimMarkerLength = mb_strlen($trimMarker, $encoding); |
315
|
1 |
|
$start = (int)ceil(($length - $trimMarkerLength) / 2); |
316
|
1 |
|
$end = $length - $start - $trimMarkerLength; |
317
|
|
|
|
318
|
1 |
|
return self::replaceSubstring($input, $trimMarker, $start, -$end, $encoding); |
319
|
|
|
} |
320
|
|
|
|
321
|
|
|
/** |
322
|
|
|
* Truncates a string from the end to the number of characters specified. |
323
|
|
|
* |
324
|
|
|
* @param string $input The string to truncate. |
325
|
|
|
* @param int $length Maximum length of the truncated string including trim marker. |
326
|
|
|
* @param string $trimMarker String to append to the end of truncated string. |
327
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
328
|
|
|
* |
329
|
|
|
* @return string The truncated string. |
330
|
|
|
*/ |
331
|
1 |
|
public static function truncateEnd(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
332
|
|
|
{ |
333
|
1 |
|
$inputLength = mb_strlen($input, $encoding); |
334
|
|
|
|
335
|
1 |
|
if ($inputLength <= $length) { |
336
|
1 |
|
return $input; |
337
|
|
|
} |
338
|
|
|
|
339
|
1 |
|
$trimMarkerLength = mb_strlen($trimMarker, $encoding); |
340
|
1 |
|
return rtrim(mb_substr($input, 0, $length - $trimMarkerLength, $encoding)) . $trimMarker; |
341
|
|
|
} |
342
|
|
|
|
343
|
|
|
/** |
344
|
|
|
* Truncates a string to the number of words specified. |
345
|
|
|
* |
346
|
|
|
* @param string $input The string to truncate. |
347
|
|
|
* @param int $count How many words from original string to include into truncated string. |
348
|
|
|
* @param string $trimMarker String to append to the end of truncated string. |
349
|
|
|
* |
350
|
|
|
* @return string The truncated string. |
351
|
|
|
*/ |
352
|
1 |
|
public static function truncateWords(string $input, int $count, string $trimMarker = '…'): string |
353
|
|
|
{ |
354
|
1 |
|
$words = preg_split('/(\s+)/u', trim($input), -1, PREG_SPLIT_DELIM_CAPTURE); |
355
|
1 |
|
if (count($words) / 2 > $count) { |
356
|
|
|
/** @var string[] $words */ |
357
|
1 |
|
$words = array_slice($words, 0, ($count * 2) - 1); |
358
|
1 |
|
return implode('', $words) . $trimMarker; |
359
|
|
|
} |
360
|
|
|
|
361
|
1 |
|
return $input; |
362
|
|
|
} |
363
|
|
|
|
364
|
|
|
/** |
365
|
|
|
* Get string length. |
366
|
|
|
* |
367
|
|
|
* @param string $string String to calculate length for. |
368
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
369
|
|
|
* |
370
|
|
|
* @see https://php.net/manual/en/function.mb-strlen.php |
371
|
|
|
* |
372
|
|
|
* @return int |
373
|
|
|
*/ |
374
|
1 |
|
public static function length(string $string, string $encoding = 'UTF-8'): int |
375
|
|
|
{ |
376
|
1 |
|
return mb_strlen($string, $encoding); |
377
|
|
|
} |
378
|
|
|
|
379
|
|
|
/** |
380
|
|
|
* Counts words in a string. |
381
|
|
|
* |
382
|
|
|
* @param string $input |
383
|
|
|
* |
384
|
|
|
* @return int |
385
|
|
|
*/ |
386
|
1 |
|
public static function countWords(string $input): int |
387
|
|
|
{ |
388
|
1 |
|
return count(preg_split('/\s+/u', $input, -1, PREG_SPLIT_NO_EMPTY)); |
389
|
|
|
} |
390
|
|
|
|
391
|
|
|
/** |
392
|
|
|
* Make a string lowercase. |
393
|
|
|
* |
394
|
|
|
* @param string $string String to process. |
395
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
396
|
|
|
* |
397
|
|
|
* @see https://php.net/manual/en/function.mb-strtolower.php |
398
|
|
|
* |
399
|
|
|
* @return string |
400
|
|
|
*/ |
401
|
3 |
|
public static function lowercase(string $string, string $encoding = 'UTF-8'): string |
402
|
|
|
{ |
403
|
3 |
|
return mb_strtolower($string, $encoding); |
404
|
|
|
} |
405
|
|
|
|
406
|
|
|
/** |
407
|
|
|
* Make a string uppercase. |
408
|
|
|
* |
409
|
|
|
* @param string $string String to process. |
410
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
411
|
|
|
* |
412
|
|
|
* @see https://php.net/manual/en/function.mb-strtoupper.php |
413
|
|
|
* |
414
|
|
|
* @return string |
415
|
|
|
*/ |
416
|
15 |
|
public static function uppercase(string $string, string $encoding = 'UTF-8'): string |
417
|
|
|
{ |
418
|
15 |
|
return mb_strtoupper($string, $encoding); |
419
|
|
|
} |
420
|
|
|
|
421
|
|
|
/** |
422
|
|
|
* Make a string's first character uppercase. |
423
|
|
|
* |
424
|
|
|
* @param string $string The string to be processed. |
425
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
426
|
|
|
* |
427
|
|
|
* @return string |
428
|
|
|
* |
429
|
|
|
* @see https://php.net/manual/en/function.ucfirst.php |
430
|
|
|
*/ |
431
|
14 |
|
public static function uppercaseFirstCharacter(string $string, string $encoding = 'UTF-8'): string |
432
|
|
|
{ |
433
|
14 |
|
$firstCharacter = self::substring($string, 0, 1, $encoding); |
434
|
14 |
|
$rest = self::substring($string, 1, null, $encoding); |
435
|
|
|
|
436
|
14 |
|
return self::uppercase($firstCharacter, $encoding) . $rest; |
437
|
|
|
} |
438
|
|
|
|
439
|
|
|
/** |
440
|
|
|
* Uppercase the first character of each word in a string. |
441
|
|
|
* |
442
|
|
|
* @param string $string The string to be processed. |
443
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
444
|
|
|
* |
445
|
|
|
* @see https://php.net/manual/en/function.ucwords.php |
446
|
|
|
* |
447
|
|
|
* @return string |
448
|
|
|
*/ |
449
|
10 |
|
public static function uppercaseFirstCharacterInEachWord(string $string, string $encoding = 'UTF-8'): string |
450
|
|
|
{ |
451
|
10 |
|
$words = preg_split('/\s/u', $string, -1, PREG_SPLIT_NO_EMPTY); |
452
|
|
|
|
453
|
10 |
|
$wordsWithUppercaseFirstCharacter = array_map(static function (string $word) use ($encoding) { |
454
|
9 |
|
return self::uppercaseFirstCharacter($word, $encoding); |
455
|
10 |
|
}, $words); |
456
|
|
|
|
457
|
10 |
|
return implode(' ', $wordsWithUppercaseFirstCharacter); |
458
|
|
|
} |
459
|
|
|
|
460
|
|
|
/** |
461
|
|
|
* Encodes string into "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648). |
462
|
|
|
* |
463
|
|
|
* > Note: Base 64 padding `=` may be at the end of the returned string. |
464
|
|
|
* > `=` is not transparent to URL encoding. |
465
|
|
|
* |
466
|
|
|
* @see https://tools.ietf.org/html/rfc4648#page-7 |
467
|
|
|
* |
468
|
|
|
* @param string $input The string to encode. |
469
|
|
|
* |
470
|
|
|
* @return string Encoded string. |
471
|
|
|
*/ |
472
|
4 |
|
public static function base64UrlEncode(string $input): string |
473
|
|
|
{ |
474
|
4 |
|
return strtr(base64_encode($input), '+/', '-_'); |
475
|
|
|
} |
476
|
|
|
|
477
|
|
|
/** |
478
|
|
|
* Decodes "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648). |
479
|
|
|
* |
480
|
|
|
* @see https://tools.ietf.org/html/rfc4648#page-7 |
481
|
|
|
* |
482
|
|
|
* @param string $input Encoded string. |
483
|
|
|
* |
484
|
|
|
* @return string Decoded string. |
485
|
|
|
*/ |
486
|
4 |
|
public static function base64UrlDecode(string $input): string |
487
|
|
|
{ |
488
|
4 |
|
return base64_decode(strtr($input, '-_', '+/')); |
489
|
|
|
} |
490
|
|
|
|
491
|
|
|
/** |
492
|
|
|
* Split a string to array with non-empty lines. |
493
|
|
|
* Whitespace from the beginning and end of a each line will be stripped. |
494
|
|
|
* |
495
|
|
|
* @param string $string The input string. |
496
|
|
|
* @param string $separator The boundary string. It is a part of regular expression |
497
|
|
|
* so should be taken into account or properly escaped with {@see preg_quote()}. |
498
|
|
|
* |
499
|
|
|
* @return array |
500
|
|
|
*/ |
501
|
16 |
|
public static function split(string $string, string $separator = '\R'): array |
502
|
|
|
{ |
503
|
16 |
|
$string = preg_replace('(^\s*|\s*$)', '', $string); |
504
|
16 |
|
return preg_split('~\s*' . $separator . '\s*~u', $string, -1, PREG_SPLIT_NO_EMPTY); |
505
|
|
|
} |
506
|
|
|
|
507
|
|
|
/** |
508
|
|
|
* @param string $path The path of where do you want to write a value to `$array`. The path can be described by |
509
|
|
|
* a string when each key should be separated by delimiter. If a path item contains delimiter, it can be escaped |
510
|
|
|
* with "\" (backslash) or a custom delimiter can be used. |
511
|
|
|
* @param string $delimiter A separator, used to parse string key for embedded object property retrieving. Defaults |
512
|
|
|
* to "." (dot). |
513
|
|
|
* @param string $escapeCharacter An escape character, used to escape delimiter. Defaults to "\" (backslash). |
514
|
|
|
* @param bool $preserveDelimiterEscaping Whether to preserve delimiter escaping in the items of final array (in |
515
|
|
|
* case of using string as an input). When `false`, "\" (backslashes) are removed. For a "." as delimiter, "." |
516
|
|
|
* becomes "\.". Defaults to `false`. |
517
|
|
|
* |
518
|
|
|
* @return string[] |
519
|
|
|
* |
520
|
|
|
* @psalm-return list<string> |
521
|
|
|
*/ |
522
|
34 |
|
public static function parsePath( |
523
|
|
|
string $path, |
524
|
|
|
string $delimiter = '.', |
525
|
|
|
string $escapeCharacter = '\\', |
526
|
|
|
bool $preserveDelimiterEscaping = false |
527
|
|
|
): array { |
528
|
34 |
|
if (strlen($delimiter) !== 1) { |
529
|
1 |
|
throw new InvalidArgumentException('Only 1 character is allowed for delimiter.'); |
530
|
|
|
} |
531
|
|
|
|
532
|
33 |
|
if (strlen($escapeCharacter) !== 1) { |
533
|
1 |
|
throw new InvalidArgumentException('Only 1 escape character is allowed.'); |
534
|
|
|
} |
535
|
|
|
|
536
|
32 |
|
if ($delimiter === $escapeCharacter) { |
537
|
1 |
|
throw new InvalidArgumentException('Delimiter and escape character must be different.'); |
538
|
|
|
} |
539
|
|
|
|
540
|
31 |
|
if ($path === '') { |
541
|
2 |
|
return []; |
542
|
|
|
} |
543
|
|
|
|
544
|
|
|
/** @psalm-var non-empty-list<array{0:string, 1:int}> $matches */ |
545
|
29 |
|
$matches = preg_split( |
546
|
29 |
|
sprintf( |
547
|
29 |
|
'/(?<!%1$s)((?>%1$s%1$s)*)%2$s/', |
548
|
29 |
|
preg_quote($escapeCharacter, '/'), |
549
|
29 |
|
preg_quote($delimiter, '/') |
550
|
29 |
|
), |
551
|
29 |
|
$path, |
552
|
29 |
|
-1, |
553
|
29 |
|
PREG_SPLIT_OFFSET_CAPTURE |
554
|
29 |
|
); |
555
|
29 |
|
$result = []; |
556
|
29 |
|
$countResults = count($matches); |
557
|
29 |
|
for ($i = 1; $i < $countResults; $i++) { |
558
|
25 |
|
$l = $matches[$i][1] - $matches[$i - 1][1] - strlen($matches[$i - 1][0]) - 1; |
559
|
25 |
|
$result[] = $matches[$i - 1][0] . ($l > 0 ? str_repeat($escapeCharacter, $l) : ''); |
560
|
|
|
} |
561
|
29 |
|
$result[] = $matches[$countResults - 1][0]; |
562
|
|
|
|
563
|
29 |
|
if ($preserveDelimiterEscaping === true) { |
564
|
1 |
|
return $result; |
565
|
|
|
} |
566
|
|
|
|
567
|
28 |
|
return array_map( |
568
|
28 |
|
static function (string $key) use ($delimiter, $escapeCharacter): string { |
569
|
28 |
|
return str_replace( |
570
|
28 |
|
[ |
571
|
28 |
|
$escapeCharacter . $escapeCharacter, |
572
|
28 |
|
$escapeCharacter . $delimiter, |
573
|
28 |
|
], |
574
|
28 |
|
[ |
575
|
28 |
|
$escapeCharacter, |
576
|
28 |
|
$delimiter, |
577
|
28 |
|
], |
578
|
28 |
|
$key |
579
|
28 |
|
); |
580
|
28 |
|
}, |
581
|
28 |
|
$result |
582
|
28 |
|
); |
583
|
|
|
} |
584
|
|
|
|
585
|
|
|
/** |
586
|
|
|
* Strip Unicode whitespace (with Unicode symbol property White_Space=yes) or other characters from the beginning and end of a string. |
587
|
|
|
* Input string and pattern are treated as UTF-8. |
588
|
|
|
* |
589
|
|
|
* @see https://en.wikipedia.org/wiki/Whitespace_character#Unicode |
590
|
|
|
* @see https://www.php.net/manual/function.preg-replace |
591
|
|
|
* |
592
|
|
|
* @param string|string[] $string The string or an array with strings. |
593
|
|
|
* @param string $pattern PCRE regex pattern to search for, as UTF-8 string. Use {@see preg_quote()} to quote `$pattern` if it contains |
594
|
|
|
* special regular expression characters. |
595
|
|
|
* |
596
|
|
|
* @psalm-template TKey of array-key |
597
|
|
|
* @psalm-param string|array<TKey, string> $string |
598
|
|
|
* @psalm-param non-empty-string $pattern |
599
|
|
|
* @psalm-return ($string is array ? array<TKey, string> : string) |
600
|
|
|
* |
601
|
|
|
* @return string|string[] |
602
|
|
|
*/ |
603
|
16 |
|
public static function trim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
604
|
|
|
{ |
605
|
16 |
|
self::ensureUtf8Pattern($pattern); |
606
|
|
|
|
607
|
15 |
|
return preg_replace("#^[$pattern]+|[$pattern]+$#uD", '', $string); |
608
|
|
|
} |
609
|
|
|
|
610
|
|
|
/** |
611
|
|
|
* Strip Unicode whitespace (with Unicode symbol property White_Space=yes) or other characters from the beginning of a string. |
612
|
|
|
* |
613
|
|
|
* @see self::trim() |
614
|
|
|
* |
615
|
|
|
* @param string|string[] $string The string or an array with strings. |
616
|
|
|
* @param string $pattern PCRE regex pattern to search for, as UTF-8 string. Use {@see preg_quote()} to quote `$pattern` if it contains |
617
|
|
|
* special regular expression characters. |
618
|
|
|
* |
619
|
|
|
* @psalm-template TKey of array-key |
620
|
|
|
* @psalm-param string|array<TKey, string> $string |
621
|
|
|
* @psalm-param non-empty-string $pattern |
622
|
|
|
* @psalm-return ($string is array ? array<TKey, string> : string) |
623
|
|
|
* |
624
|
|
|
* @return string|string[] |
625
|
|
|
*/ |
626
|
12 |
|
public static function ltrim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
627
|
|
|
{ |
628
|
12 |
|
self::ensureUtf8Pattern($pattern); |
629
|
|
|
|
630
|
12 |
|
return preg_replace("#^[$pattern]+#u", '', $string); |
631
|
|
|
} |
632
|
|
|
|
633
|
|
|
/** |
634
|
|
|
* Strip Unicode whitespace (with Unicode symbol property White_Space=yes) or other characters from the end of a string. |
635
|
|
|
* |
636
|
|
|
* @see self::trim() |
637
|
|
|
* |
638
|
|
|
* @param string|string[] $string The string or an array with strings. |
639
|
|
|
* @param string $pattern PCRE regex pattern to search for, as UTF-8 string. Use {@see preg_quote()} to quote `$pattern` if it contains |
640
|
|
|
* special regular expression characters. |
641
|
|
|
* |
642
|
|
|
* @psalm-template TKey of array-key |
643
|
|
|
* @psalm-param string|array<TKey, string> $string |
644
|
|
|
* @psalm-param non-empty-string $pattern |
645
|
|
|
* @psalm-return ($string is array ? array<TKey, string> : string) |
646
|
|
|
* |
647
|
|
|
* @return string|string[] |
648
|
|
|
*/ |
649
|
14 |
|
public static function rtrim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
650
|
|
|
{ |
651
|
14 |
|
self::ensureUtf8Pattern($pattern); |
652
|
|
|
|
653
|
14 |
|
return preg_replace("#[$pattern]+$#uD", '', $string); |
654
|
|
|
} |
655
|
|
|
|
656
|
|
|
/** |
657
|
|
|
* Ensure the input string is a valid UTF-8 string. |
658
|
|
|
* |
659
|
|
|
* @param string $pattern The input string. |
660
|
|
|
* |
661
|
|
|
* @throws InvalidArgumentException |
662
|
|
|
*/ |
663
|
42 |
|
private static function ensureUtf8Pattern(string $pattern): void |
664
|
|
|
{ |
665
|
42 |
|
if (!preg_match('##u', $pattern)) { |
666
|
1 |
|
throw new InvalidArgumentException('Pattern is not a valid UTF-8 string.'); |
667
|
|
|
} |
668
|
|
|
} |
669
|
|
|
} |
670
|
|
|
|