1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace Yiisoft\Strings; |
6
|
|
|
|
7
|
|
|
use InvalidArgumentException; |
8
|
|
|
|
9
|
|
|
use function array_slice; |
10
|
|
|
use function count; |
11
|
|
|
use function function_exists; |
12
|
|
|
use function max; |
13
|
|
|
use function mb_strlen; |
14
|
|
|
use function mb_strtolower; |
15
|
|
|
use function mb_strtoupper; |
16
|
|
|
use function mb_substr; |
17
|
|
|
use function str_ends_with; |
18
|
|
|
use function str_starts_with; |
19
|
|
|
use function strlen; |
20
|
|
|
|
21
|
|
|
/** |
22
|
|
|
* Provides static methods to work with strings. |
23
|
|
|
*/ |
24
|
|
|
final class StringHelper |
25
|
|
|
{ |
26
|
|
|
public const DEFAULT_WHITESPACE_PATTERN = "\pC\pZ"; |
27
|
|
|
|
28
|
|
|
/** |
29
|
|
|
* Returns the number of bytes in the given string. |
30
|
|
|
* This method ensures the string is treated as a byte array even if `mbstring.func_overload` is turned on |
31
|
|
|
* by using {@see mb_strlen()}. |
32
|
|
|
* |
33
|
|
|
* @param string|null $input The string being measured for length. |
34
|
|
|
* |
35
|
|
|
* @return int The number of bytes in the given string. |
36
|
|
|
*/ |
37
|
2 |
|
public static function byteLength(?string $input): int |
38
|
|
|
{ |
39
|
2 |
|
return mb_strlen((string)$input, '8bit'); |
40
|
|
|
} |
41
|
|
|
|
42
|
|
|
/** |
43
|
|
|
* Returns the portion of string specified by the start and length parameters. |
44
|
|
|
* This method ensures the string is treated as a byte array by using `mb_substr()`. |
45
|
|
|
* |
46
|
|
|
* @param string $input The input string. Must be one character or longer. |
47
|
|
|
* @param int $start The starting position. |
48
|
|
|
* @param int|null $length The desired portion length. If not specified or `null`, there will be |
49
|
|
|
* no limit on length i.e. the output will be until the end of the string. |
50
|
|
|
* |
51
|
|
|
* @return string The extracted part of string, or FALSE on failure or an empty string. |
52
|
|
|
* |
53
|
|
|
* @see https://www.php.net/manual/en/function.substr.php |
54
|
|
|
*/ |
55
|
1 |
|
public static function byteSubstring(string $input, int $start, int $length = null): string |
56
|
|
|
{ |
57
|
1 |
|
return mb_substr($input, $start, $length ?? mb_strlen($input, '8bit'), '8bit'); |
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
/** |
61
|
|
|
* Returns the trailing name component of a path. |
62
|
|
|
* This method is similar to the php function `basename()` except that it will |
63
|
|
|
* treat both \ and / as directory separators, independent of the operating system. |
64
|
|
|
* This method was mainly created to work on php namespaces. When working with real |
65
|
|
|
* file paths, PHP's `basename()` should work fine for you. |
66
|
|
|
* Note: this method is not aware of the actual filesystem, or path components such as "..". |
67
|
|
|
* |
68
|
|
|
* @param string $path A path string. |
69
|
|
|
* @param string $suffix If the name component ends in suffix this will also be cut off. |
70
|
|
|
* |
71
|
|
|
* @return string The trailing name component of the given path. |
72
|
|
|
* |
73
|
|
|
* @see https://www.php.net/manual/en/function.basename.php |
74
|
|
|
*/ |
75
|
1 |
|
public static function baseName(string $path, string $suffix = ''): string |
76
|
|
|
{ |
77
|
1 |
|
$length = mb_strlen($suffix); |
78
|
1 |
|
if ($length > 0 && mb_substr($path, -$length) === $suffix) { |
79
|
1 |
|
$path = mb_substr($path, 0, -$length); |
80
|
|
|
} |
81
|
1 |
|
$path = rtrim(str_replace('\\', '/', $path), '/\\'); |
82
|
1 |
|
$position = mb_strrpos($path, '/'); |
83
|
1 |
|
if ($position !== false) { |
84
|
1 |
|
return mb_substr($path, $position + 1); |
85
|
|
|
} |
86
|
|
|
|
87
|
1 |
|
return $path; |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
/** |
91
|
|
|
* Returns parent directory's path. |
92
|
|
|
* This method is similar to `dirname()` except that it will treat |
93
|
|
|
* both \ and / as directory separators, independent of the operating system. |
94
|
|
|
* |
95
|
|
|
* @param string $path A path string. |
96
|
|
|
* |
97
|
|
|
* @return string The parent directory's path. |
98
|
|
|
* |
99
|
|
|
* @see https://www.php.net/manual/en/function.basename.php |
100
|
|
|
*/ |
101
|
1 |
|
public static function directoryName(string $path): string |
102
|
|
|
{ |
103
|
1 |
|
$position = mb_strrpos(str_replace('\\', '/', $path), '/'); |
104
|
1 |
|
if ($position !== false) { |
105
|
1 |
|
return mb_substr($path, 0, $position); |
106
|
|
|
} |
107
|
|
|
|
108
|
1 |
|
return ''; |
109
|
|
|
} |
110
|
|
|
|
111
|
|
|
/** |
112
|
|
|
* Get part of string. |
113
|
|
|
* |
114
|
|
|
* @param string $string To get substring from. |
115
|
|
|
* @param int $start Character to start at. |
116
|
|
|
* @param int|null $length Number of characters to get. |
117
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
118
|
|
|
* |
119
|
|
|
* @see https://php.net/manual/en/function.mb-substr.php |
120
|
|
|
* |
121
|
|
|
* @return string |
122
|
|
|
*/ |
123
|
15 |
|
public static function substring(string $string, int $start, int $length = null, string $encoding = 'UTF-8'): string |
124
|
|
|
{ |
125
|
15 |
|
return mb_substr($string, $start, $length, $encoding); |
126
|
|
|
} |
127
|
|
|
|
128
|
|
|
/** |
129
|
|
|
* Replace text within a portion of a string. |
130
|
|
|
* |
131
|
|
|
* @param string $string The input string. |
132
|
|
|
* @param string $replacement The replacement string. |
133
|
|
|
* @param int $start Position to begin replacing substring at. |
134
|
|
|
* If start is non-negative, the replacing will begin at the start'th offset into string. |
135
|
|
|
* If start is negative, the replacing will begin at the start'th character from the end of string. |
136
|
|
|
* @param int|null $length Length of the substring to be replaced. |
137
|
|
|
* If given and is positive, it represents the length of the portion of string which is to be replaced. |
138
|
|
|
* If it is negative, it represents the number of characters from the end of string at which to stop replacing. |
139
|
|
|
* If it is not given, then it will default to the length of the string; i.e. end the replacing at the end of string. |
140
|
|
|
* If length is zero then this function will have the effect of inserting replacement into string at the given start offset. |
141
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
142
|
|
|
* |
143
|
|
|
* @return string |
144
|
|
|
*/ |
145
|
9 |
|
public static function replaceSubstring(string $string, string $replacement, int $start, ?int $length = null, string $encoding = 'UTF-8'): string |
146
|
|
|
{ |
147
|
9 |
|
$stringLength = mb_strlen($string, $encoding); |
148
|
|
|
|
149
|
9 |
|
if ($start < 0) { |
150
|
2 |
|
$start = max(0, $stringLength + $start); |
151
|
7 |
|
} elseif ($start > $stringLength) { |
152
|
1 |
|
$start = $stringLength; |
153
|
|
|
} |
154
|
|
|
|
155
|
9 |
|
if ($length !== null && $length < 0) { |
156
|
3 |
|
$length = max(0, $stringLength - $start + $length); |
157
|
6 |
|
} elseif ($length === null || $length > $stringLength) { |
158
|
5 |
|
$length = $stringLength; |
159
|
|
|
} |
160
|
|
|
|
161
|
9 |
|
if (($start + $length) > $stringLength) { |
162
|
4 |
|
$length = $stringLength - $start; |
163
|
|
|
} |
164
|
|
|
|
165
|
9 |
|
return mb_substr($string, 0, $start, $encoding) . $replacement . mb_substr($string, $start + $length, $stringLength - $start - $length, $encoding); |
166
|
|
|
} |
167
|
|
|
|
168
|
|
|
/** |
169
|
|
|
* Check if given string starts with specified substring. |
170
|
|
|
* Binary and multibyte safe. |
171
|
|
|
* |
172
|
|
|
* @param string $input Input string. |
173
|
|
|
* @param string|null $with Part to search inside the $string. |
174
|
|
|
* |
175
|
|
|
* @return bool Returns true if first input starts with second input, false otherwise. |
176
|
|
|
*/ |
177
|
19 |
|
public static function startsWith(string $input, ?string $with): bool |
178
|
|
|
{ |
179
|
19 |
|
if ($with === null) { |
180
|
1 |
|
return true; |
181
|
|
|
} |
182
|
|
|
|
183
|
18 |
|
if (function_exists('\str_starts_with')) { |
184
|
18 |
|
return str_starts_with($input, $with); |
185
|
|
|
} |
186
|
|
|
|
187
|
|
|
$bytes = self::byteLength($with); |
188
|
|
|
if ($bytes === 0) { |
189
|
|
|
return true; |
190
|
|
|
} |
191
|
|
|
|
192
|
|
|
return strncmp($input, $with, $bytes) === 0; |
193
|
|
|
} |
194
|
|
|
|
195
|
|
|
/** |
196
|
|
|
* Check if given string starts with specified substring ignoring case. |
197
|
|
|
* Binary and multibyte safe. |
198
|
|
|
* |
199
|
|
|
* @param string $input Input string. |
200
|
|
|
* @param string|null $with Part to search inside the $string. |
201
|
|
|
* |
202
|
|
|
* @return bool Returns true if first input starts with second input, false otherwise. |
203
|
|
|
*/ |
204
|
1 |
|
public static function startsWithIgnoringCase(string $input, ?string $with): bool |
205
|
|
|
{ |
206
|
1 |
|
$bytes = self::byteLength($with); |
207
|
1 |
|
if ($bytes === 0) { |
208
|
1 |
|
return true; |
209
|
|
|
} |
210
|
|
|
|
211
|
|
|
/** |
212
|
|
|
* @psalm-suppress PossiblyNullArgument |
213
|
|
|
*/ |
214
|
1 |
|
return self::lowercase(self::substring($input, 0, $bytes, '8bit')) === self::lowercase($with); |
|
|
|
|
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
/** |
218
|
|
|
* Check if given string ends with specified substring. |
219
|
|
|
* Binary and multibyte safe. |
220
|
|
|
* |
221
|
|
|
* @param string $input Input string to check. |
222
|
|
|
* @param string|null $with Part to search inside of the $string. |
223
|
|
|
* |
224
|
|
|
* @return bool Returns true if first input ends with second input, false otherwise. |
225
|
|
|
*/ |
226
|
19 |
|
public static function endsWith(string $input, ?string $with): bool |
227
|
|
|
{ |
228
|
19 |
|
if ($with === null) { |
229
|
1 |
|
return true; |
230
|
|
|
} |
231
|
|
|
|
232
|
18 |
|
if (function_exists('\str_ends_with')) { |
233
|
18 |
|
return str_ends_with($input, $with); |
234
|
|
|
} |
235
|
|
|
|
236
|
|
|
$bytes = self::byteLength($with); |
237
|
|
|
if ($bytes === 0) { |
238
|
|
|
return true; |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
// Warning check, see https://php.net/manual/en/function.substr-compare.php#refsect1-function.substr-compare-returnvalues |
242
|
|
|
if (self::byteLength($input) < $bytes) { |
243
|
|
|
return false; |
244
|
|
|
} |
245
|
|
|
|
246
|
|
|
return substr_compare($input, $with, -$bytes, $bytes) === 0; |
247
|
|
|
} |
248
|
|
|
|
249
|
|
|
/** |
250
|
|
|
* Check if given string ends with specified substring. |
251
|
|
|
* Binary and multibyte safe. |
252
|
|
|
* |
253
|
|
|
* @param string $input Input string to check. |
254
|
|
|
* @param string|null $with Part to search inside of the $string. |
255
|
|
|
* |
256
|
|
|
* @return bool Returns true if first input ends with second input, false otherwise. |
257
|
|
|
*/ |
258
|
1 |
|
public static function endsWithIgnoringCase(string $input, ?string $with): bool |
259
|
|
|
{ |
260
|
1 |
|
$bytes = self::byteLength($with); |
261
|
1 |
|
if ($bytes === 0) { |
262
|
1 |
|
return true; |
263
|
|
|
} |
264
|
|
|
|
265
|
|
|
/** |
266
|
|
|
* @psalm-suppress PossiblyNullArgument |
267
|
|
|
*/ |
268
|
1 |
|
return self::lowercase(mb_substr($input, -$bytes, mb_strlen($input, '8bit'), '8bit')) === self::lowercase($with); |
|
|
|
|
269
|
|
|
} |
270
|
|
|
|
271
|
|
|
/** |
272
|
|
|
* Truncates a string from the beginning to the number of characters specified. |
273
|
|
|
* |
274
|
|
|
* @param string $input String to process. |
275
|
|
|
* @param int $length Maximum length of the truncated string including trim marker. |
276
|
|
|
* @param string $trimMarker String to append to the beginning. |
277
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
278
|
|
|
* |
279
|
|
|
* @return string |
280
|
|
|
*/ |
281
|
1 |
|
public static function truncateBegin(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
282
|
|
|
{ |
283
|
1 |
|
$inputLength = mb_strlen($input, $encoding); |
284
|
|
|
|
285
|
1 |
|
if ($inputLength <= $length) { |
286
|
1 |
|
return $input; |
287
|
|
|
} |
288
|
|
|
|
289
|
1 |
|
$trimMarkerLength = mb_strlen($trimMarker, $encoding); |
290
|
1 |
|
return self::replaceSubstring($input, $trimMarker, 0, -$length + $trimMarkerLength, $encoding); |
291
|
|
|
} |
292
|
|
|
|
293
|
|
|
/** |
294
|
|
|
* Truncates a string in the middle. Keeping start and end. |
295
|
|
|
* `StringHelper::truncateMiddle('Hello world number 2', 8)` produces "Hell…r 2". |
296
|
|
|
* |
297
|
|
|
* @param string $input The string to truncate. |
298
|
|
|
* @param int $length Maximum length of the truncated string including trim marker. |
299
|
|
|
* @param string $trimMarker String to append in the middle of truncated string. |
300
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
301
|
|
|
* |
302
|
|
|
* @return string The truncated string. |
303
|
|
|
*/ |
304
|
2 |
|
public static function truncateMiddle(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
305
|
|
|
{ |
306
|
2 |
|
$inputLength = mb_strlen($input, $encoding); |
307
|
|
|
|
308
|
2 |
|
if ($inputLength <= $length) { |
309
|
1 |
|
return $input; |
310
|
|
|
} |
311
|
|
|
|
312
|
1 |
|
$trimMarkerLength = mb_strlen($trimMarker, $encoding); |
313
|
1 |
|
$start = (int)ceil(($length - $trimMarkerLength) / 2); |
314
|
1 |
|
$end = $length - $start - $trimMarkerLength; |
315
|
|
|
|
316
|
1 |
|
return self::replaceSubstring($input, $trimMarker, $start, -$end, $encoding); |
317
|
|
|
} |
318
|
|
|
|
319
|
|
|
/** |
320
|
|
|
* Truncates a string from the end to the number of characters specified. |
321
|
|
|
* |
322
|
|
|
* @param string $input The string to truncate. |
323
|
|
|
* @param int $length Maximum length of the truncated string including trim marker. |
324
|
|
|
* @param string $trimMarker String to append to the end of truncated string. |
325
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
326
|
|
|
* |
327
|
|
|
* @return string The truncated string. |
328
|
|
|
*/ |
329
|
1 |
|
public static function truncateEnd(string $input, int $length, string $trimMarker = '…', string $encoding = 'UTF-8'): string |
330
|
|
|
{ |
331
|
1 |
|
$inputLength = mb_strlen($input, $encoding); |
332
|
|
|
|
333
|
1 |
|
if ($inputLength <= $length) { |
334
|
1 |
|
return $input; |
335
|
|
|
} |
336
|
|
|
|
337
|
1 |
|
$trimMarkerLength = mb_strlen($trimMarker, $encoding); |
338
|
1 |
|
return rtrim(mb_substr($input, 0, $length - $trimMarkerLength, $encoding)) . $trimMarker; |
339
|
|
|
} |
340
|
|
|
|
341
|
|
|
/** |
342
|
|
|
* Truncates a string to the number of words specified. |
343
|
|
|
* |
344
|
|
|
* @param string $input The string to truncate. |
345
|
|
|
* @param int $count How many words from original string to include into truncated string. |
346
|
|
|
* @param string $trimMarker String to append to the end of truncated string. |
347
|
|
|
* |
348
|
|
|
* @return string The truncated string. |
349
|
|
|
*/ |
350
|
1 |
|
public static function truncateWords(string $input, int $count, string $trimMarker = '…'): string |
351
|
|
|
{ |
352
|
1 |
|
$words = preg_split('/(\s+)/u', trim($input), -1, PREG_SPLIT_DELIM_CAPTURE); |
353
|
1 |
|
if (count($words) / 2 > $count) { |
354
|
|
|
/** @var string[] $words */ |
355
|
1 |
|
$words = array_slice($words, 0, ($count * 2) - 1); |
356
|
1 |
|
return implode('', $words) . $trimMarker; |
357
|
|
|
} |
358
|
|
|
|
359
|
1 |
|
return $input; |
360
|
|
|
} |
361
|
|
|
|
362
|
|
|
/** |
363
|
|
|
* Get string length. |
364
|
|
|
* |
365
|
|
|
* @param string $string String to calculate length for. |
366
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
367
|
|
|
* |
368
|
|
|
* @see https://php.net/manual/en/function.mb-strlen.php |
369
|
|
|
* |
370
|
|
|
* @return int |
371
|
|
|
*/ |
372
|
1 |
|
public static function length(string $string, string $encoding = 'UTF-8'): int |
373
|
|
|
{ |
374
|
1 |
|
return mb_strlen($string, $encoding); |
375
|
|
|
} |
376
|
|
|
|
377
|
|
|
/** |
378
|
|
|
* Counts words in a string. |
379
|
|
|
* |
380
|
|
|
* @param string $input |
381
|
|
|
* |
382
|
|
|
* @return int |
383
|
|
|
*/ |
384
|
1 |
|
public static function countWords(string $input): int |
385
|
|
|
{ |
386
|
1 |
|
return count(preg_split('/\s+/u', $input, -1, PREG_SPLIT_NO_EMPTY)); |
387
|
|
|
} |
388
|
|
|
|
389
|
|
|
/** |
390
|
|
|
* Make a string lowercase. |
391
|
|
|
* |
392
|
|
|
* @param string $string String to process. |
393
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
394
|
|
|
* |
395
|
|
|
* @see https://php.net/manual/en/function.mb-strtolower.php |
396
|
|
|
* |
397
|
|
|
* @return string |
398
|
|
|
*/ |
399
|
3 |
|
public static function lowercase(string $string, string $encoding = 'UTF-8'): string |
400
|
|
|
{ |
401
|
3 |
|
return mb_strtolower($string, $encoding); |
402
|
|
|
} |
403
|
|
|
|
404
|
|
|
/** |
405
|
|
|
* Make a string uppercase. |
406
|
|
|
* |
407
|
|
|
* @param string $string String to process. |
408
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
409
|
|
|
* |
410
|
|
|
* @see https://php.net/manual/en/function.mb-strtoupper.php |
411
|
|
|
* |
412
|
|
|
* @return string |
413
|
|
|
*/ |
414
|
15 |
|
public static function uppercase(string $string, string $encoding = 'UTF-8'): string |
415
|
|
|
{ |
416
|
15 |
|
return mb_strtoupper($string, $encoding); |
417
|
|
|
} |
418
|
|
|
|
419
|
|
|
/** |
420
|
|
|
* Make a string's first character uppercase. |
421
|
|
|
* |
422
|
|
|
* @param string $string The string to be processed. |
423
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
424
|
|
|
* |
425
|
|
|
* @return string |
426
|
|
|
* |
427
|
|
|
* @see https://php.net/manual/en/function.ucfirst.php |
428
|
|
|
*/ |
429
|
14 |
|
public static function uppercaseFirstCharacter(string $string, string $encoding = 'UTF-8'): string |
430
|
|
|
{ |
431
|
14 |
|
$firstCharacter = self::substring($string, 0, 1, $encoding); |
432
|
14 |
|
$rest = self::substring($string, 1, null, $encoding); |
433
|
|
|
|
434
|
14 |
|
return self::uppercase($firstCharacter, $encoding) . $rest; |
435
|
|
|
} |
436
|
|
|
|
437
|
|
|
/** |
438
|
|
|
* Uppercase the first character of each word in a string. |
439
|
|
|
* |
440
|
|
|
* @param string $string The string to be processed. |
441
|
|
|
* @param string $encoding The encoding to use, defaults to "UTF-8". |
442
|
|
|
* |
443
|
|
|
* @see https://php.net/manual/en/function.ucwords.php |
444
|
|
|
* |
445
|
|
|
* @return string |
446
|
|
|
*/ |
447
|
10 |
|
public static function uppercaseFirstCharacterInEachWord(string $string, string $encoding = 'UTF-8'): string |
448
|
|
|
{ |
449
|
10 |
|
$words = preg_split('/\s/u', $string, -1, PREG_SPLIT_NO_EMPTY); |
450
|
|
|
|
451
|
10 |
|
$wordsWithUppercaseFirstCharacter = array_map(static function (string $word) use ($encoding) { |
452
|
9 |
|
return self::uppercaseFirstCharacter($word, $encoding); |
453
|
10 |
|
}, $words); |
454
|
|
|
|
455
|
10 |
|
return implode(' ', $wordsWithUppercaseFirstCharacter); |
456
|
|
|
} |
457
|
|
|
|
458
|
|
|
/** |
459
|
|
|
* Encodes string into "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648). |
460
|
|
|
* |
461
|
|
|
* > Note: Base 64 padding `=` may be at the end of the returned string. |
462
|
|
|
* > `=` is not transparent to URL encoding. |
463
|
|
|
* |
464
|
|
|
* @see https://tools.ietf.org/html/rfc4648#page-7 |
465
|
|
|
* |
466
|
|
|
* @param string $input The string to encode. |
467
|
|
|
* |
468
|
|
|
* @return string Encoded string. |
469
|
|
|
*/ |
470
|
4 |
|
public static function base64UrlEncode(string $input): string |
471
|
|
|
{ |
472
|
4 |
|
return strtr(base64_encode($input), '+/', '-_'); |
473
|
|
|
} |
474
|
|
|
|
475
|
|
|
/** |
476
|
|
|
* Decodes "Base 64 Encoding with URL and Filename Safe Alphabet" (RFC 4648). |
477
|
|
|
* |
478
|
|
|
* @see https://tools.ietf.org/html/rfc4648#page-7 |
479
|
|
|
* |
480
|
|
|
* @param string $input Encoded string. |
481
|
|
|
* |
482
|
|
|
* @return string Decoded string. |
483
|
|
|
*/ |
484
|
4 |
|
public static function base64UrlDecode(string $input): string |
485
|
|
|
{ |
486
|
4 |
|
return base64_decode(strtr($input, '-_', '+/')); |
487
|
|
|
} |
488
|
|
|
|
489
|
|
|
/** |
490
|
|
|
* Split a string to array with non-empty lines. |
491
|
|
|
* Whitespace from the beginning and end of a each line will be stripped. |
492
|
|
|
* |
493
|
|
|
* @param string $string The input string. |
494
|
|
|
* @param string $separator The boundary string. It is a part of regular expression |
495
|
|
|
* so should be taken into account or properly escaped with {@see preg_quote()}. |
496
|
|
|
* |
497
|
|
|
* @return array |
498
|
|
|
*/ |
499
|
16 |
|
public static function split(string $string, string $separator = '\R'): array |
500
|
|
|
{ |
501
|
16 |
|
$string = preg_replace('(^\s*|\s*$)', '', $string); |
502
|
16 |
|
return preg_split('~\s*' . $separator . '\s*~u', $string, -1, PREG_SPLIT_NO_EMPTY); |
503
|
|
|
} |
504
|
|
|
|
505
|
|
|
/** |
506
|
|
|
* @param string $path The path of where do you want to write a value to `$array`. The path can be described by |
507
|
|
|
* a string when each key should be separated by delimiter. If a path item contains delimiter, it can be escaped |
508
|
|
|
* with "\" (backslash) or a custom delimiter can be used. |
509
|
|
|
* @param string $delimiter A separator, used to parse string key for embedded object property retrieving. Defaults |
510
|
|
|
* to "." (dot). |
511
|
|
|
* @param string $escapeCharacter An escape character, used to escape delimiter. Defaults to "\" (backslash). |
512
|
|
|
* @param bool $preserveDelimiterEscaping Whether to preserve delimiter escaping in the items of final array (in |
513
|
|
|
* case of using string as an input). When `false`, "\" (backslashes) are removed. For a "." as delimiter, "." |
514
|
|
|
* becomes "\.". Defaults to `false`. |
515
|
|
|
* |
516
|
|
|
* @return string[] |
517
|
|
|
* |
518
|
|
|
* @psalm-return list<string> |
519
|
|
|
*/ |
520
|
34 |
|
public static function parsePath( |
521
|
|
|
string $path, |
522
|
|
|
string $delimiter = '.', |
523
|
|
|
string $escapeCharacter = '\\', |
524
|
|
|
bool $preserveDelimiterEscaping = false |
525
|
|
|
): array { |
526
|
34 |
|
if (strlen($delimiter) !== 1) { |
527
|
1 |
|
throw new InvalidArgumentException('Only 1 character is allowed for delimiter.'); |
528
|
|
|
} |
529
|
|
|
|
530
|
33 |
|
if (strlen($escapeCharacter) !== 1) { |
531
|
1 |
|
throw new InvalidArgumentException('Only 1 escape character is allowed.'); |
532
|
|
|
} |
533
|
|
|
|
534
|
32 |
|
if ($delimiter === $escapeCharacter) { |
535
|
1 |
|
throw new InvalidArgumentException('Delimiter and escape character must be different.'); |
536
|
|
|
} |
537
|
|
|
|
538
|
31 |
|
if ($path === '') { |
539
|
2 |
|
return []; |
540
|
|
|
} |
541
|
|
|
|
542
|
|
|
/** @psalm-var non-empty-list<array{0:string, 1:int}> $matches */ |
543
|
29 |
|
$matches = preg_split( |
544
|
29 |
|
sprintf( |
545
|
29 |
|
'/(?<!%1$s)((?>%1$s%1$s)*)%2$s/', |
546
|
29 |
|
preg_quote($escapeCharacter, '/'), |
547
|
29 |
|
preg_quote($delimiter, '/') |
548
|
29 |
|
), |
549
|
29 |
|
$path, |
550
|
29 |
|
-1, |
551
|
29 |
|
PREG_SPLIT_OFFSET_CAPTURE |
552
|
29 |
|
); |
553
|
29 |
|
$result = []; |
554
|
29 |
|
$countResults = count($matches); |
555
|
29 |
|
for ($i = 1; $i < $countResults; $i++) { |
556
|
25 |
|
$l = $matches[$i][1] - $matches[$i - 1][1] - strlen($matches[$i - 1][0]) - 1; |
557
|
25 |
|
$result[] = $matches[$i - 1][0] . ($l > 0 ? str_repeat($escapeCharacter, $l) : ''); |
558
|
|
|
} |
559
|
29 |
|
$result[] = $matches[$countResults - 1][0]; |
560
|
|
|
|
561
|
29 |
|
if ($preserveDelimiterEscaping === true) { |
562
|
1 |
|
return $result; |
563
|
|
|
} |
564
|
|
|
|
565
|
28 |
|
return array_map( |
566
|
28 |
|
static function (string $key) use ($delimiter, $escapeCharacter): string { |
567
|
28 |
|
return str_replace( |
568
|
28 |
|
[ |
569
|
28 |
|
$escapeCharacter . $escapeCharacter, |
570
|
28 |
|
$escapeCharacter . $delimiter, |
571
|
28 |
|
], |
572
|
28 |
|
[ |
573
|
28 |
|
$escapeCharacter, |
574
|
28 |
|
$delimiter, |
575
|
28 |
|
], |
576
|
28 |
|
$key |
577
|
28 |
|
); |
578
|
28 |
|
}, |
579
|
28 |
|
$result |
580
|
28 |
|
); |
581
|
|
|
} |
582
|
|
|
|
583
|
|
|
/** |
584
|
|
|
* Strip Unicode whitespace (with property White_Space=yes) or other characters from the beginning and end of a string. |
585
|
|
|
* Input string and pattern are treated as UTF-8. |
586
|
|
|
* |
587
|
|
|
* @see https://en.wikipedia.org/wiki/Whitespace_character#Unicode |
588
|
|
|
* @see https://www.php.net/manual/function.preg-replace |
589
|
|
|
* |
590
|
|
|
* @param string|string[] $string The string or an array with strings. |
591
|
|
|
* @param string $pattern PCRE regex pattern to search for, as UTF-8 string. Quote $pattern if it contains special regular expression characters. |
592
|
|
|
* @see https://www.php.net/manual/function.preg-quote.php |
593
|
|
|
* |
594
|
|
|
* @return array|string |
595
|
|
|
*/ |
596
|
9 |
|
public static function trim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
597
|
|
|
{ |
598
|
9 |
|
self::ensureUTF8Pattern($pattern); |
599
|
|
|
|
600
|
8 |
|
return preg_replace("#^[$pattern]+|[$pattern]+$#uD", '', $string); |
601
|
|
|
} |
602
|
|
|
|
603
|
|
|
/** |
604
|
|
|
* Strip Unicode whitespace (with property White_Space=yes) or other characters from the beginning of a string. |
605
|
|
|
* |
606
|
|
|
* {@see self::trim()} |
607
|
|
|
* |
608
|
|
|
* @param string|string[] $string |
609
|
|
|
* @param string $pattern |
610
|
|
|
* |
611
|
|
|
* @return array|string |
612
|
|
|
*/ |
613
|
8 |
|
public static function ltrim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
614
|
|
|
{ |
615
|
8 |
|
self::ensureUTF8Pattern($pattern); |
616
|
|
|
|
617
|
8 |
|
return preg_replace("#^[$pattern]+#uD", '', $string); |
618
|
|
|
} |
619
|
|
|
|
620
|
|
|
/** |
621
|
|
|
* Strip Unicode whitespace (with property White_Space=yes) or other characters from the end of a string. |
622
|
|
|
* |
623
|
|
|
* {@see self::trim()} |
624
|
|
|
* |
625
|
|
|
* @param string|string[] $string |
626
|
|
|
* @param string $pattern |
627
|
|
|
* |
628
|
|
|
* @return array|string |
629
|
|
|
*/ |
630
|
7 |
|
public static function rtrim(string|array $string, string $pattern = self::DEFAULT_WHITESPACE_PATTERN): string|array |
631
|
|
|
{ |
632
|
7 |
|
self::ensureUTF8Pattern($pattern); |
633
|
|
|
|
634
|
7 |
|
return preg_replace("#[$pattern]+$#uD", '', $string); |
635
|
|
|
} |
636
|
|
|
|
637
|
|
|
/** |
638
|
|
|
* Ensure the input string is a valid UTF-8 string. |
639
|
|
|
* |
640
|
|
|
* @param string $pattern The input string. |
641
|
|
|
* |
642
|
|
|
* @throws InvalidArgumentException |
643
|
|
|
*/ |
644
|
24 |
|
private static function ensureUTF8Pattern(string $pattern): void |
645
|
|
|
{ |
646
|
24 |
|
if (!preg_match('##u', $pattern)) { |
647
|
1 |
|
throw new InvalidArgumentException('Pattern is not a valid UTF-8 string.'); |
648
|
|
|
} |
649
|
|
|
} |
650
|
|
|
} |
651
|
|
|
|