Passed
Push — master ( d50de1...0381b8 )
by Lars
09:55 queued 01:18
created

UTF8::strtr()   B

Complexity

Conditions 11
Paths 52

Size

Total Lines 42
Code Lines 22

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 11.307

Importance

Changes 3
Bugs 1 Features 0
Metric Value
cc 11
eloc 22
c 3
b 1
f 0
nc 52
nop 3
dl 0
loc 42
ccs 19
cts 22
cp 0.8636
crap 11.307
rs 7.3166

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace voku\helper;
6
7
/**
8
 * @psalm-immutable
9
 */
10
final class UTF8
11
{
12
    /**
13
     * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
14
     * This regular expression is a work around for http://bugs.exim.org/1279
15
     *
16
     * @deprecated <p>please don't use it anymore</p>
17
     */
18
    const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])";
19
20
    /**
21
     * Bom => Byte-Length
22
     *
23
     * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
24
     *
25
     * @var array<string, int>
26
     */
27
    private static $BOM = [
28
        "\xef\xbb\xbf"     => 3, // UTF-8 BOM
29
        ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
30
        "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
31
        '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
32
        "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
33
        'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
34
        "\xfe\xff"         => 2, // UTF-16 (BE) BOM
35
        'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
36
        "\xff\xfe"         => 2, // UTF-16 (LE) BOM
37
        'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
38
    ];
39
40
    /**
41
     * Numeric code point => UTF-8 Character
42
     *
43
     * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
44
     *
45
     * @var array<int, string>
46
     */
47
    private static $WHITESPACE = [
48
        // NULL Byte
49
        0 => "\x0",
50
        // Tab
51
        9 => "\x9",
52
        // New Line
53
        10 => "\xa",
54
        // Vertical Tab
55
        11 => "\xb",
56
        // Carriage Return
57
        13 => "\xd",
58
        // Ordinary Space
59
        32 => "\x20",
60
        // NO-BREAK SPACE
61
        160 => "\xc2\xa0",
62
        // OGHAM SPACE MARK
63
        5760 => "\xe1\x9a\x80",
64
        // MONGOLIAN VOWEL SEPARATOR
65
        6158 => "\xe1\xa0\x8e",
66
        // EN QUAD
67
        8192 => "\xe2\x80\x80",
68
        // EM QUAD
69
        8193 => "\xe2\x80\x81",
70
        // EN SPACE
71
        8194 => "\xe2\x80\x82",
72
        // EM SPACE
73
        8195 => "\xe2\x80\x83",
74
        // THREE-PER-EM SPACE
75
        8196 => "\xe2\x80\x84",
76
        // FOUR-PER-EM SPACE
77
        8197 => "\xe2\x80\x85",
78
        // SIX-PER-EM SPACE
79
        8198 => "\xe2\x80\x86",
80
        // FIGURE SPACE
81
        8199 => "\xe2\x80\x87",
82
        // PUNCTUATION SPACE
83
        8200 => "\xe2\x80\x88",
84
        // THIN SPACE
85
        8201 => "\xe2\x80\x89",
86
        // HAIR SPACE
87
        8202 => "\xe2\x80\x8a",
88
        // LINE SEPARATOR
89
        8232 => "\xe2\x80\xa8",
90
        // PARAGRAPH SEPARATOR
91
        8233 => "\xe2\x80\xa9",
92
        // NARROW NO-BREAK SPACE
93
        8239 => "\xe2\x80\xaf",
94
        // MEDIUM MATHEMATICAL SPACE
95
        8287 => "\xe2\x81\x9f",
96
        // HALFWIDTH HANGUL FILLER
97
        65440 => "\xef\xbe\xa0",
98
        // IDEOGRAPHIC SPACE
99
        12288 => "\xe3\x80\x80",
100
    ];
101
102
    /**
103
     * @var array<string, string>
104
     */
105
    private static $WHITESPACE_TABLE = [
106
        'SPACE'                     => "\x20",
107
        'NO-BREAK SPACE'            => "\xc2\xa0",
108
        'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
109
        'EN QUAD'                   => "\xe2\x80\x80",
110
        'EM QUAD'                   => "\xe2\x80\x81",
111
        'EN SPACE'                  => "\xe2\x80\x82",
112
        'EM SPACE'                  => "\xe2\x80\x83",
113
        'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
114
        'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
115
        'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
116
        'FIGURE SPACE'              => "\xe2\x80\x87",
117
        'PUNCTUATION SPACE'         => "\xe2\x80\x88",
118
        'THIN SPACE'                => "\xe2\x80\x89",
119
        'HAIR SPACE'                => "\xe2\x80\x8a",
120
        'LINE SEPARATOR'            => "\xe2\x80\xa8",
121
        'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
122
        'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
123
        'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
124
        'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
125
        'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
126
        'HALFWIDTH HANGUL FILLER'   => "\xef\xbe\xa0",
127
    ];
128
129
    /**
130
     * @var array
131
     *
132
     * @psalm-var array{upper: string[], lower: string[]}
133
     */
134
    private static $COMMON_CASE_FOLD = [
135
        'upper' => [
136
            'µ',
137
            'ſ',
138
            "\xCD\x85",
139
            'ς',
140
            'ẞ',
141
            "\xCF\x90",
142
            "\xCF\x91",
143
            "\xCF\x95",
144
            "\xCF\x96",
145
            "\xCF\xB0",
146
            "\xCF\xB1",
147
            "\xCF\xB5",
148
            "\xE1\xBA\x9B",
149
            "\xE1\xBE\xBE",
150
        ],
151
        'lower' => [
152
            'μ',
153
            's',
154
            'ι',
155
            'σ',
156
            'ß',
157
            'β',
158
            'θ',
159
            'φ',
160
            'π',
161
            'κ',
162
            'ρ',
163
            'ε',
164
            "\xE1\xB9\xA1",
165
            'ι',
166
        ],
167
    ];
168
169
    /**
170
     * @var array
171
     *
172
     * @psalm-var array<string, mixed>
173
     */
174
    private static $SUPPORT = [];
175
176
    /**
177
     * @var string[]|null
178
     *
179
     * @psalm-var array<string, string>|null
180
     */
181
    private static $BROKEN_UTF8_FIX;
182
183
    /**
184
     * @var string[]|null
185
     *
186
     * @psalm-var array<int, string>|null
187
     */
188
    private static $WIN1252_TO_UTF8;
189
190
    /**
191
     * @var string[]|null
192
     *
193
     * @psalm-var array<int ,string>|null
194
     */
195
    private static $INTL_TRANSLITERATOR_LIST;
196
197
    /**
198
     * @var string[]|null
199
     *
200
     * @psalm-var array<string>|null
201
     */
202
    private static $ENCODINGS;
203
204
    /**
205
     * @var int[]|null
206
     *
207
     * @psalm-var array<string ,int>|null
208
     */
209
    private static $ORD;
210
211
    /**
212
     * @var string[]|null
213
     *
214
     * @psalm-var array<string, string>|null
215
     */
216
    private static $EMOJI;
217
218
    /**
219
     * @var string[]|null
220
     *
221
     * @psalm-var array<string>|null
222
     */
223
    private static $EMOJI_VALUES_CACHE;
224
225
    /**
226
     * @var string[]|null
227
     *
228
     * @psalm-var array<string>|null
229
     */
230
    private static $EMOJI_KEYS_CACHE;
231
232
    /**
233
     * @var string[]|null
234
     *
235
     * @psalm-var array<string>|null
236
     */
237
    private static $EMOJI_KEYS_REVERSIBLE_CACHE;
238
239
    /**
240
     * @var string[]|null
241
     *
242
     * @psalm-var array<int, string>|null
243
     */
244
    private static $CHR;
245
246
    /**
247
     * __construct()
248
     */
249 34
    public function __construct()
250
    {
251 34
    }
252
253
    /**
254
     * Return the character at the specified position: $str[1] like functionality.
255
     *
256
     * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code>
257
     *
258
     * @param string $str      <p>A UTF-8 string.</p>
259
     * @param int    $pos      <p>The position of character to return.</p>
260
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
261
     *
262
     * @psalm-pure
263
     *
264
     * @return string
265
     *                <p>Single multi-byte character.</p>
266
     */
267 3
    public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string
268
    {
269 3
        if ($str === '' || $pos < 0) {
270 2
            return '';
271
        }
272
273 3
        if ($encoding === 'UTF-8') {
274 3
            return (string) \mb_substr($str, $pos, 1);
275
        }
276
277
        return (string) self::substr($str, $pos, 1, $encoding);
278
    }
279
280
    /**
281
     * Prepends UTF-8 BOM character to the string and returns the whole string.
282
     *
283
     * INFO: If BOM already existed there, the Input string is returned.
284
     *
285
     * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code>
286
     *
287
     * @param string $str <p>The input string.</p>
288
     *
289
     * @psalm-pure
290
     *
291
     * @return string
292
     *                <p>The output string that contains BOM.</p>
293
     */
294 2
    public static function add_bom_to_string(string $str): string
295
    {
296 2
        if (!self::string_has_bom($str)) {
297 2
            $str = self::bom() . $str;
298
        }
299
300 2
        return $str;
301
    }
302
303
    /**
304
     * Changes all keys in an array.
305
     *
306
     * @param array<string, mixed> $array    <p>The array to work on</p>
307
     * @param int                  $case     [optional] <p> Either <strong>CASE_UPPER</strong><br>
308
     *                                       or <strong>CASE_LOWER</strong> (default)</p>
309
     * @param string               $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
310
     *
311
     * @psalm-pure
312
     *
313
     * @return string[]
314
     *                  <p>An array with its keys lower- or uppercased.</p>
315
     */
316 2
    public static function array_change_key_case(
317
        array $array,
318
        int $case = \CASE_LOWER,
319
        string $encoding = 'UTF-8'
320
    ): array {
321
        if (
322 2
            $case !== \CASE_LOWER
323
            &&
324 2
            $case !== \CASE_UPPER
325
        ) {
326
            $case = \CASE_LOWER;
327
        }
328
329 2
        $return = [];
330 2
        foreach ($array as $key => &$value) {
331 2
            $key = $case === \CASE_LOWER
332 2
                ? self::strtolower((string) $key, $encoding)
333 2
                : self::strtoupper((string) $key, $encoding);
334
335 2
            $return[$key] = $value;
336
        }
337
338 2
        return $return;
339
    }
340
341
    /**
342
     * Returns the substring between $start and $end, if found, or an empty
343
     * string. An optional offset may be supplied from which to begin the
344
     * search for the start string.
345
     *
346
     * @param string $str
347
     * @param string $start    <p>Delimiter marking the start of the substring.</p>
348
     * @param string $end      <p>Delimiter marking the end of the substring.</p>
349
     * @param int    $offset   [optional] <p>Index from which to begin the search. Default: 0</p>
350
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
351
     *
352
     * @psalm-pure
353
     *
354
     * @return string
355
     */
356 16
    public static function between(
357
        string $str,
358
        string $start,
359
        string $end,
360
        int $offset = 0,
361
        string $encoding = 'UTF-8'
362
    ): string {
363 16
        if ($encoding === 'UTF-8') {
364 8
            $start_position = \mb_strpos($str, $start, $offset);
365 8
            if ($start_position === false) {
366 1
                return '';
367
            }
368
369 7
            $substr_index = $start_position + (int) \mb_strlen($start);
370 7
            $end_position = \mb_strpos($str, $end, $substr_index);
371
            if (
372 7
                $end_position === false
373
                ||
374 7
                $end_position === $substr_index
375
            ) {
376 2
                return '';
377
            }
378
379 5
            return (string) \mb_substr($str, $substr_index, $end_position - $substr_index);
380
        }
381
382 8
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
383
384 8
        $start_position = self::strpos($str, $start, $offset, $encoding);
385 8
        if ($start_position === false) {
386 1
            return '';
387
        }
388
389 7
        $substr_index = $start_position + (int) self::strlen($start, $encoding);
390 7
        $end_position = self::strpos($str, $end, $substr_index, $encoding);
391
        if (
392 7
            $end_position === false
393
            ||
394 7
            $end_position === $substr_index
395
        ) {
396 2
            return '';
397
        }
398
399 5
        return (string) self::substr(
400 5
            $str,
401 5
            $substr_index,
402 5
            $end_position - $substr_index,
403 5
            $encoding
404
        );
405
    }
406
407
    /**
408
     * Convert binary into a string.
409
     *
410
     * INFO: opposite to UTF8::str_to_binary()
411
     *
412
     * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code>
413
     *
414
     * @param string $bin 1|0
415
     *
416
     * @psalm-pure
417
     *
418
     * @return string
419
     */
420 2
    public static function binary_to_str($bin): string
421
    {
422 2
        if (!isset($bin[0])) {
423
            return '';
424
        }
425
426 2
        $convert = \base_convert($bin, 2, 16);
427 2
        if ($convert === '0') {
428 1
            return '';
429
        }
430
431 2
        return \pack('H*', $convert);
432
    }
433
434
    /**
435
     * Returns the UTF-8 Byte Order Mark Character.
436
     *
437
     * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
438
     *
439
     * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code>
440
     *
441
     * @psalm-pure
442
     *
443
     * @return string
444
     *                <p>UTF-8 Byte Order Mark.</p>
445
     */
446 4
    public static function bom(): string
447
    {
448 4
        return "\xef\xbb\xbf";
449
    }
450
451
    /**
452
     * @alias of UTF8::chr_map()
453
     *
454
     * @param callable $callback
455
     * @param string   $str
456
     *
457
     * @psalm-pure
458
     *
459
     * @return string[]
460
     *
461
     * @see   UTF8::chr_map()
462
     */
463 2
    public static function callback($callback, string $str): array
464
    {
465 2
        return self::chr_map($callback, $str);
466
    }
467
468
    /**
469
     * Returns the character at $index, with indexes starting at 0.
470
     *
471
     * @param string $str      <p>The input string.</p>
472
     * @param int    $index    <p>Position of the character.</p>
473
     * @param string $encoding [optional] <p>Default is UTF-8</p>
474
     *
475
     * @psalm-pure
476
     *
477
     * @return string
478
     *                <p>The character at $index.</p>
479
     */
480 9
    public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string
481
    {
482 9
        if ($encoding === 'UTF-8') {
483 5
            return (string) \mb_substr($str, $index, 1);
484
        }
485
486 4
        return (string) self::substr($str, $index, 1, $encoding);
487
    }
488
489
    /**
490
     * Returns an array consisting of the characters in the string.
491
     *
492
     * @param string $str <p>The input string.</p>
493
     *
494
     * @psalm-pure
495
     *
496
     * @return string[]
497
     *                  <p>An array of chars.</p>
498
     */
499 3
    public static function chars(string $str): array
500
    {
501
        /** @var string[] */
502 3
        return self::str_split($str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
503
    }
504
505
    /**
506
     * This method will auto-detect your server environment for UTF-8 support.
507
     *
508
     * @return true|null
509
     *
510
     * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p>
511
     */
512 5
    public static function checkForSupport()
513
    {
514 5
        if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
515
            self::$SUPPORT['already_checked_via_portable_utf8'] = true;
516
517
            // http://php.net/manual/en/book.mbstring.php
518
            self::$SUPPORT['mbstring'] = self::mbstring_loaded();
519
            self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
520
            if (self::$SUPPORT['mbstring'] === true) {
521
                \mb_internal_encoding('UTF-8');
522
                /** @noinspection UnusedFunctionResultInspection */
523
                /** @noinspection PhpComposerExtensionStubsInspection */
524
                \mb_regex_encoding('UTF-8');
525
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
526
            }
527
528
            // http://php.net/manual/en/book.iconv.php
529
            self::$SUPPORT['iconv'] = self::iconv_loaded();
530
531
            // http://php.net/manual/en/book.intl.php
532
            self::$SUPPORT['intl'] = self::intl_loaded();
533
534
            // http://php.net/manual/en/class.intlchar.php
535
            self::$SUPPORT['intlChar'] = self::intlChar_loaded();
536
537
            // http://php.net/manual/en/book.ctype.php
538
            self::$SUPPORT['ctype'] = self::ctype_loaded();
539
540
            // http://php.net/manual/en/class.finfo.php
541
            self::$SUPPORT['finfo'] = self::finfo_loaded();
542
543
            // http://php.net/manual/en/book.json.php
544
            self::$SUPPORT['json'] = self::json_loaded();
545
546
            // http://php.net/manual/en/book.pcre.php
547
            self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
548
549
            self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used();
550
            if (self::$SUPPORT['symfony_polyfill_used'] === true) {
551
                \mb_internal_encoding('UTF-8');
552
                self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8';
553
            }
554
555
            return true;
556
        }
557
558 5
        return null;
559
    }
560
561
    /**
562
     * Generates a UTF-8 encoded character from the given code point.
563
     *
564
     * INFO: opposite to UTF8::ord()
565
     *
566
     * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code>
567
     *
568
     * @param int    $code_point <p>The code point for which to generate a character.</p>
569
     * @param string $encoding   [optional] <p>Default is UTF-8</p>
570
     *
571
     * @psalm-pure
572
     *
573
     * @return string|null
574
     *                     <p>Multi-byte character, returns null on failure or empty input.</p>
575
     */
576 21
    public static function chr($code_point, string $encoding = 'UTF-8')
577
    {
578
        // init
579
        /**
580
         * @psalm-suppress ImpureStaticVariable
581
         *
582
         * @var array<string,string>
583
         */
584 21
        static $CHAR_CACHE = [];
585
586 21
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
587 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
588
        }
589
590
        /** @noinspection InArrayCanBeUsedInspection */
591
        if (
592 21
            $encoding !== 'UTF-8'
593
            &&
594 21
            $encoding !== 'ISO-8859-1'
595
            &&
596 21
            $encoding !== 'WINDOWS-1252'
597
            &&
598 21
            self::$SUPPORT['mbstring'] === false
599
        ) {
600
            /**
601
             * @psalm-suppress ImpureFunctionCall - is is only a warning
602
             */
603
            \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
604
        }
605
606 21
        if ($code_point <= 0) {
607 5
            return null;
608
        }
609
610 21
        $cache_key = $code_point . '_' . $encoding;
611 21
        if (isset($CHAR_CACHE[$cache_key])) {
612 19
            return $CHAR_CACHE[$cache_key];
613
        }
614
615 10
        if ($code_point <= 0x80) { // only for "simple"-chars
616
617 9
            if (self::$CHR === null) {
618
                self::$CHR = self::getData('chr');
619
            }
620
621
            /**
622
             * @psalm-suppress PossiblyNullArrayAccess
623
             */
624 9
            $chr = self::$CHR[$code_point];
625
626 9
            if ($encoding !== 'UTF-8') {
627 1
                $chr = self::encode($encoding, $chr);
628
            }
629
630 9
            return $CHAR_CACHE[$cache_key] = $chr;
631
        }
632
633
        //
634
        // fallback via "IntlChar"
635
        //
636
637 6
        if (self::$SUPPORT['intlChar'] === true) {
638
            /** @noinspection PhpComposerExtensionStubsInspection */
639 6
            $chr = \IntlChar::chr($code_point);
640
641 6
            if ($encoding !== 'UTF-8') {
642
                $chr = self::encode($encoding, $chr);
643
            }
644
645 6
            return $CHAR_CACHE[$cache_key] = $chr;
646
        }
647
648
        //
649
        // fallback via vanilla php
650
        //
651
652
        if (self::$CHR === null) {
653
            self::$CHR = self::getData('chr');
654
        }
655
656
        $code_point = (int) $code_point;
657
        if ($code_point <= 0x7FF) {
658
            /**
659
             * @psalm-suppress PossiblyNullArrayAccess
660
             */
661
            $chr = self::$CHR[($code_point >> 6) + 0xC0] .
662
                   self::$CHR[($code_point & 0x3F) + 0x80];
663
        } elseif ($code_point <= 0xFFFF) {
664
            /**
665
             * @psalm-suppress PossiblyNullArrayAccess
666
             */
667
            $chr = self::$CHR[($code_point >> 12) + 0xE0] .
668
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
669
                   self::$CHR[($code_point & 0x3F) + 0x80];
670
        } else {
671
            /**
672
             * @psalm-suppress PossiblyNullArrayAccess
673
             */
674
            $chr = self::$CHR[($code_point >> 18) + 0xF0] .
675
                   self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
676
                   self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
677
                   self::$CHR[($code_point & 0x3F) + 0x80];
678
        }
679
680
        if ($encoding !== 'UTF-8') {
681
            $chr = self::encode($encoding, $chr);
682
        }
683
684
        return $CHAR_CACHE[$cache_key] = $chr;
685
    }
686
687
    /**
688
     * Applies callback to all characters of a string.
689
     *
690
     * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code>
691
     *
692
     * @param callable $callback <p>The callback function.</p>
693
     * @param string   $str      <p>UTF-8 string to run callback on.</p>
694
     *
695
     * @psalm-pure
696
     *
697
     * @return string[]
698
     *                  <p>The outcome of the callback, as array.</p>
699
     */
700 2
    public static function chr_map($callback, string $str): array
701
    {
702 2
        return \array_map(
703 2
            $callback,
704 2
            self::str_split($str)
705
        );
706
    }
707
708
    /**
709
     * Generates an array of byte length of each character of a Unicode string.
710
     *
711
     * 1 byte => U+0000  - U+007F
712
     * 2 byte => U+0080  - U+07FF
713
     * 3 byte => U+0800  - U+FFFF
714
     * 4 byte => U+10000 - U+10FFFF
715
     *
716
     * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code>
717
     *
718
     * @param string $str <p>The original unicode string.</p>
719
     *
720
     * @psalm-pure
721
     *
722
     * @return int[]
723
     *               <p>An array of byte lengths of each character.</p>
724
     */
725 4
    public static function chr_size_list(string $str): array
726
    {
727 4
        if ($str === '') {
728 4
            return [];
729
        }
730
731 4
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
732
            return \array_map(
733
                static function (string $data): int {
734
                    // "mb_" is available if overload is used, so use it ...
735
                    return \mb_strlen($data, 'CP850'); // 8-BIT
736
                },
737
                self::str_split($str)
738
            );
739
        }
740
741 4
        return \array_map('\strlen', self::str_split($str));
742
    }
743
744
    /**
745
     * Get a decimal code representation of a specific character.
746
     *
747
     * INFO: opposite to UTF8::decimal_to_chr()
748
     *
749
     * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code>
750
     *
751
     * @param string $char <p>The input character.</p>
752
     *
753
     * @psalm-pure
754
     *
755
     * @return int
756
     */
757 5
    public static function chr_to_decimal(string $char): int
758
    {
759 5
        if (self::$SUPPORT['iconv'] === true) {
760 5
            $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char);
761 5
            if ($chr_tmp !== false) {
762
                /** @noinspection OffsetOperationsInspection */
763 5
                return \unpack('V', $chr_tmp)[1];
764
            }
765
        }
766
767
        $code = self::ord($char[0]);
768
        $bytes = 1;
769
770
        if (!($code & 0x80)) {
771
            // 0xxxxxxx
772
            return $code;
773
        }
774
775
        if (($code & 0xe0) === 0xc0) {
776
            // 110xxxxx
777
            $bytes = 2;
778
            $code &= ~0xc0;
779
        } elseif (($code & 0xf0) === 0xe0) {
780
            // 1110xxxx
781
            $bytes = 3;
782
            $code &= ~0xe0;
783
        } elseif (($code & 0xf8) === 0xf0) {
784
            // 11110xxx
785
            $bytes = 4;
786
            $code &= ~0xf0;
787
        }
788
789
        for ($i = 2; $i <= $bytes; ++$i) {
790
            // 10xxxxxx
791
            $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
792
        }
793
794
        return $code;
795
    }
796
797
    /**
798
     * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
799
     *
800
     * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code>
801
     *
802
     * @param int|string $char   <p>The input character</p>
803
     * @param string     $prefix [optional]
804
     *
805
     * @psalm-pure
806
     *
807
     * @return string
808
     *                <p>The code point encoded as U+xxxx.</p>
809
     */
810 2
    public static function chr_to_hex($char, string $prefix = 'U+'): string
811
    {
812 2
        if ($char === '') {
813 2
            return '';
814
        }
815
816 2
        if ($char === '&#0;') {
817 2
            $char = '';
818
        }
819
820 2
        return self::int_to_hex(self::ord((string) $char), $prefix);
821
    }
822
823
    /**
824
     * alias for "UTF8::chr_to_decimal()"
825
     *
826
     * @param string $chr
827
     *
828
     * @psalm-pure
829
     *
830
     * @return int
831
     *
832
     * @see        UTF8::chr_to_decimal()
833
     * @deprecated <p>please use "UTF8::chr_to_decimal()"</p>
834
     */
835 2
    public static function chr_to_int(string $chr): int
836
    {
837 2
        return self::chr_to_decimal($chr);
838
    }
839
840
    /**
841
     * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
842
     *
843
     * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code>
844
     *
845
     * @param string $body         <p>The original string to be split.</p>
846
     * @param int    $chunk_length [optional] <p>The maximum character length of a chunk.</p>
847
     * @param string $end          [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
848
     *
849
     * @psalm-pure
850
     *
851
     * @return string
852
     *                <p>The chunked string.</p>
853
     */
854 4
    public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string
855
    {
856 4
        return \implode($end, self::str_split($body, $chunk_length));
857
    }
858
859
    /**
860
     * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
861
     *
862
     * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
863
     *
864
     * @param string $str                                     <p>The string to be sanitized.</p>
865
     * @param bool   $remove_bom                              [optional] <p>Set to true, if you need to remove
866
     *                                                        UTF-BOM.</p>
867
     * @param bool   $normalize_whitespace                    [optional] <p>Set to true, if you need to normalize the
868
     *                                                        whitespace.</p>
869
     * @param bool   $normalize_msword                        [optional] <p>Set to true, if you need to normalize MS
870
     *                                                        Word chars e.g.: "…"
871
     *                                                        => "..."</p>
872
     * @param bool   $keep_non_breaking_space                 [optional] <p>Set to true, to keep non-breaking-spaces,
873
     *                                                        in
874
     *                                                        combination with
875
     *                                                        $normalize_whitespace</p>
876
     * @param bool   $replace_diamond_question_mark           [optional] <p>Set to true, if you need to remove diamond
877
     *                                                        question mark e.g.: "�"</p>
878
     * @param bool   $remove_invisible_characters             [optional] <p>Set to false, if you not want to remove
879
     *                                                        invisible characters e.g.: "\0"</p>
880
     * @param bool   $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove
881
     *                                                        invisible url encoded characters e.g.: "%0B"<br> WARNING:
882
     *                                                        maybe contains false-positives e.g. aa%0Baa -> aaaa.
883
     *                                                        </p>
884
     *
885
     * @psalm-pure
886
     *
887
     * @return string
888
     *                <p>An clean UTF-8 encoded string.</p>
889
     *
890
     * @noinspection PhpTooManyParametersInspection
891
     */
892 89
    public static function clean(
893
        string $str,
894
        bool $remove_bom = false,
895
        bool $normalize_whitespace = false,
896
        bool $normalize_msword = false,
897
        bool $keep_non_breaking_space = false,
898
        bool $replace_diamond_question_mark = false,
899
        bool $remove_invisible_characters = true,
900
        bool $remove_invisible_characters_url_encoded = false
901
    ): string {
902
        // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
903
        // caused connection reset problem on larger strings
904
905 89
        $regex = '/
906
          (
907
            (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
908
            |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
909
            |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
910
            |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
911
            ){1,100}                      # ...one or more times
912
          )
913
        | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
914
        | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
915
        /x';
916
        /** @noinspection NotOptimalRegularExpressionsInspection */
917 89
        $str = (string) \preg_replace($regex, '$1', $str);
918
919 89
        if ($replace_diamond_question_mark) {
920 33
            $str = self::replace_diamond_question_mark($str);
921
        }
922
923 89
        if ($remove_invisible_characters) {
924 89
            $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded);
925
        }
926
927 89
        if ($normalize_whitespace) {
928 37
            $str = self::normalize_whitespace($str, $keep_non_breaking_space);
929
        }
930
931 89
        if ($normalize_msword) {
932 4
            $str = self::normalize_msword($str);
933
        }
934
935 89
        if ($remove_bom) {
936 37
            $str = self::remove_bom($str);
937
        }
938
939 89
        return $str;
940
    }
941
942
    /**
943
     * Clean-up a string and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
944
     *
945
     * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'</code>
946
     *
947
     * @param string $str <p>The input string.</p>
948
     *
949
     * @psalm-pure
950
     *
951
     * @return string
952
     */
953 33
    public static function cleanup($str): string
954
    {
955
        // init
956 33
        $str = (string) $str;
957
958 33
        if ($str === '') {
959 5
            return '';
960
        }
961
962
        // fixed ISO <-> UTF-8 Errors
963 33
        $str = self::fix_simple_utf8($str);
964
965
        // remove all none UTF-8 symbols
966
        // && remove diamond question mark (�)
967
        // && remove remove invisible characters (e.g. "\0")
968
        // && remove BOM
969
        // && normalize whitespace chars (but keep non-breaking-spaces)
970 33
        return self::clean(
971 33
            $str,
972 33
            true,
973 33
            true,
974 33
            false,
975 33
            true,
976 33
            true
977
        );
978
    }
979
980
    /**
981
     * Accepts a string or a array of strings and returns an array of Unicode code points.
982
     *
983
     * INFO: opposite to UTF8::string()
984
     *
985
     * EXAMPLE: <code>
986
     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
987
     * // ... OR ...
988
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
989
     * </code>
990
     *
991
     * @param string|string[] $arg         <p>A UTF-8 encoded string or an array of such strings.</p>
992
     * @param bool            $use_u_style <p>If True, will return code points in U+xxxx format,
993
     *                                     default, code points will be returned as integers.</p>
994
     *
995
     * @psalm-pure
996
     *
997
     * @return int[]|string[]
998
     *                        <p>
999
     *                        The array of code points:<br>
1000
     *                        int[] for $u_style === false<br>
1001
     *                        string[] for $u_style === true<br>
1002
     *                        </p>
1003
     */
1004 12
    public static function codepoints($arg, bool $use_u_style = false): array
1005
    {
1006 12
        if (\is_string($arg)) {
1007 12
            $arg = self::str_split($arg);
1008
        }
1009
1010
        /**
1011
         * @psalm-suppress DocblockTypeContradiction
1012
         */
1013 12
        if (!\is_array($arg)) {
0 ignored issues
show
introduced by
The condition is_array($arg) is always true.
Loading history...
1014 4
            return [];
1015
        }
1016
1017 12
        if ($arg === []) {
1018 7
            return [];
1019
        }
1020
1021 11
        $arg = \array_map(
1022
            [
1023 11
                self::class,
1024
                'ord',
1025
            ],
1026 11
            $arg
1027
        );
1028
1029 11
        if ($use_u_style) {
1030 2
            $arg = \array_map(
1031
                [
1032 2
                    self::class,
1033
                    'int_to_hex',
1034
                ],
1035 2
                $arg
1036
            );
1037
        }
1038
1039 11
        return $arg;
1040
    }
1041
1042
    /**
1043
     * Trims the string and replaces consecutive whitespace characters with a
1044
     * single space. This includes tabs and newline characters, as well as
1045
     * multibyte whitespace such as the thin space and ideographic space.
1046
     *
1047
     * @param string $str <p>The input string.</p>
1048
     *
1049
     * @psalm-pure
1050
     *
1051
     * @return string
1052
     *                <p>A string with trimmed $str and condensed whitespace.</p>
1053
     */
1054 13
    public static function collapse_whitespace(string $str): string
1055
    {
1056 13
        if (self::$SUPPORT['mbstring'] === true) {
1057
            /** @noinspection PhpComposerExtensionStubsInspection */
1058 13
            return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str));
1059
        }
1060
1061
        return \trim(self::regex_replace($str, '[[:space:]]+', ' '));
1062
    }
1063
1064
    /**
1065
     * Returns count of characters used in a string.
1066
     *
1067
     * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code>
1068
     *
1069
     * @param string $str                     <p>The input string.</p>
1070
     * @param bool   $clean_utf8              [optional] <p>Remove non UTF-8 chars from the string.</p>
1071
     * @param bool   $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
1072
     *
1073
     * @psalm-pure
1074
     *
1075
     * @return int[]
1076
     *               <p>An associative array of Character as keys and
1077
     *               their count as values.</p>
1078
     */
1079 19
    public static function count_chars(
1080
        string $str,
1081
        bool $clean_utf8 = false,
1082
        bool $try_to_use_mb_functions = true
1083
    ): array {
1084 19
        return \array_count_values(
1085 19
            self::str_split(
1086 19
                $str,
1087 19
                1,
1088 19
                $clean_utf8,
1089 19
                $try_to_use_mb_functions
1090
            )
1091
        );
1092
    }
1093
1094
    /**
1095
     * Create a valid CSS identifier for e.g. "class"- or "id"-attributes.
1096
     *
1097
     * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code>
1098
     *
1099
     * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95
1100
     *
1101
     * @param string   $str        <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p>
1102
     * @param string[] $filter
1103
     * @param bool     $strip_tags
1104
     * @param bool     $strtolower
1105
     *
1106
     * @psalm-pure
1107
     *
1108
     * @return string
1109
     *
1110
     * @psalm-param array<string,string> $filter
1111
     */
1112 1
    public static function css_identifier(
1113
        string $str = '',
1114
        array $filter = [
1115
            ' ' => '-',
1116
            '/' => '-',
1117
            '[' => '',
1118
            ']' => '',
1119
        ],
1120
        bool $strip_tags = false,
1121
        bool $strtolower = true
1122
    ): string {
1123
        // We could also use strtr() here but its much slower than str_replace(). In
1124
        // order to keep '__' to stay '__' we first replace it with a different
1125
        // placeholder after checking that it is not defined as a filter.
1126 1
        $double_underscore_replacements = 0;
1127
1128
        // Fallback ...
1129 1
        if (\trim($str) === '') {
1130 1
            $str = \uniqid('auto-generated-css-class', true);
1131
        } else {
1132 1
            $str = self::clean($str);
1133
        }
1134
1135 1
        if ($strip_tags) {
1136
            $str = \strip_tags($str);
1137
        }
1138
1139 1
        if ($strtolower) {
1140 1
            $str = \strtolower($str);
1141
        }
1142
1143 1
        if (!isset($filter['__'])) {
1144 1
            $str = \str_replace('__', '##', $str, $double_underscore_replacements);
1145
        }
1146
1147
        /* @noinspection ArrayValuesMissUseInspection */
1148 1
        $str = \str_replace(\array_keys($filter), \array_values($filter), $str);
1149
        // Replace temporary placeholder '##' with '__' only if the original
1150
        // $identifier contained '__'.
1151 1
        if ($double_underscore_replacements > 0) {
1152
            $str = \str_replace('##', '__', $str);
1153
        }
1154
1155
        // Valid characters in a CSS identifier are:
1156
        // - the hyphen (U+002D)
1157
        // - a-z (U+0030 - U+0039)
1158
        // - A-Z (U+0041 - U+005A)
1159
        // - the underscore (U+005F)
1160
        // - 0-9 (U+0061 - U+007A)
1161
        // - ISO 10646 characters U+00A1 and higher
1162
        // We strip out any character not in the above list.
1163 1
        $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str);
1164
        // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit.
1165 1
        $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str);
1166
1167 1
        return \trim($str, '-');
1168
    }
1169
1170
    /**
1171
     * Remove css media-queries.
1172
     *
1173
     * @param string $str
1174
     *
1175
     * @psalm-pure
1176
     *
1177
     * @return string
1178
     */
1179 1
    public static function css_stripe_media_queries(string $str): string
1180
    {
1181 1
        return (string) \preg_replace(
1182 1
            '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU',
1183 1
            '',
1184 1
            $str
1185
        );
1186
    }
1187
1188
    /**
1189
     * Checks whether ctype is available on the server.
1190
     *
1191
     * @psalm-pure
1192
     *
1193
     * @return bool
1194
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
1195
     */
1196
    public static function ctype_loaded(): bool
1197
    {
1198
        return \extension_loaded('ctype');
1199
    }
1200
1201
    /**
1202
     * Converts an int value into a UTF-8 character.
1203
     *
1204
     * INFO: opposite to UTF8::string()
1205
     *
1206
     * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code>
1207
     *
1208
     * @param int|string $int
1209
     *
1210
     * @psalm-param int|numeric-string $int
1211
     *
1212
     * @psalm-pure
1213
     *
1214
     * @return string
1215
     */
1216 20
    public static function decimal_to_chr($int): string
1217
    {
1218 20
        return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5);
1219
    }
1220
1221
    /**
1222
     * Decodes a MIME header field
1223
     *
1224
     * @param string $str
1225
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
1226
     *
1227
     * @psalm-pure
1228
     *
1229
     * @return false|string
1230
     *                      <p>A decoded MIME field on success,
1231
     *                      or false if an error occurs during the decoding.</p>
1232
     */
1233 2
    public static function decode_mimeheader($str, string $encoding = 'UTF-8')
1234
    {
1235 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1236 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1237
        }
1238
1239
        // always fallback via symfony polyfill
1240 2
        return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding);
1241
    }
1242
1243
    /**
1244
     * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji.
1245
     *
1246
     * @see https://en.wikipedia.org/wiki/ISO_3166-1
1247
     *
1248
     * @param string $country_code_iso_3166_1 <p>e.g. DE</p>
1249
     *
1250
     * @return string
1251
     *                <p>Emoji or empty string on error.</p>
1252
     */
1253 1
    public static function emoji_from_country_code(string $country_code_iso_3166_1): string
1254
    {
1255 1
        if ($country_code_iso_3166_1 === '') {
1256 1
            return '';
1257
        }
1258
1259 1
        if (self::strlen($country_code_iso_3166_1) !== 2) {
1260 1
            return '';
1261
        }
1262
1263 1
        $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1);
1264
1265 1
        $flagOffset = 0x1F1E6;
1266 1
        $asciiOffset = 0x41;
1267
1268 1
        return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') .
1269 1
               (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? '');
1270
    }
1271
1272
    /**
1273
     * Decodes a string which was encoded by "UTF8::emoji_encode()".
1274
     *
1275
     * INFO: opposite to UTF8::emoji_encode()
1276
     *
1277
     * EXAMPLE: <code>
1278
     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
1279
     * //
1280
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
1281
     * </code>
1282
     *
1283
     * @param string $str                            <p>The input string.</p>
1284
     * @param bool   $use_reversible_string_mappings [optional] <p>
1285
     *                                               When <b>TRUE</b>, we se a reversible string mapping
1286
     *                                               between "emoji_encode" and "emoji_decode".</p>
1287
     *
1288
     * @psalm-pure
1289
     *
1290
     * @return string
1291
     */
1292 9
    public static function emoji_decode(
1293
        string $str,
1294
        bool $use_reversible_string_mappings = false
1295
    ): string {
1296 9
        self::initEmojiData();
1297
1298 9
        if ($use_reversible_string_mappings) {
1299 9
            return (string) \str_replace(
1300 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1301 9
                (array) self::$EMOJI_VALUES_CACHE,
1302 9
                $str
1303
            );
1304
        }
1305
1306 1
        return (string) \str_replace(
1307 1
            (array) self::$EMOJI_KEYS_CACHE,
1308 1
            (array) self::$EMOJI_VALUES_CACHE,
1309 1
            $str
1310
        );
1311
    }
1312
1313
    /**
1314
     * Encode a string with emoji chars into a non-emoji string.
1315
     *
1316
     * INFO: opposite to UTF8::emoji_decode()
1317
     *
1318
     * EXAMPLE: <code>
1319
     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
1320
     * //
1321
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
1322
     * </code>
1323
     *
1324
     * @param string $str                            <p>The input string</p>
1325
     * @param bool   $use_reversible_string_mappings [optional] <p>
1326
     *                                               when <b>TRUE</b>, we use a reversible string mapping
1327
     *                                               between "emoji_encode" and "emoji_decode"</p>
1328
     *
1329
     * @psalm-pure
1330
     *
1331
     * @return string
1332
     */
1333 12
    public static function emoji_encode(
1334
        string $str,
1335
        bool $use_reversible_string_mappings = false
1336
    ): string {
1337 12
        self::initEmojiData();
1338
1339 12
        if ($use_reversible_string_mappings) {
1340 9
            return (string) \str_replace(
1341 9
                (array) self::$EMOJI_VALUES_CACHE,
1342 9
                (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE,
1343 9
                $str
1344
            );
1345
        }
1346
1347 4
        return (string) \str_replace(
1348 4
            (array) self::$EMOJI_VALUES_CACHE,
1349 4
            (array) self::$EMOJI_KEYS_CACHE,
1350 4
            $str
1351
        );
1352
    }
1353
1354
    /**
1355
     * Encode a string with a new charset-encoding.
1356
     *
1357
     * INFO:  This function will also try to fix broken / double encoding,
1358
     *        so you can call this function also on a UTF-8 string and you don't mess up the string.
1359
     *
1360
     * EXAMPLE: <code>
1361
     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
1362
     * //
1363
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
1364
     * //
1365
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-&#20013;&#25991;&#31354;&#30333;-'
1366
     * //
1367
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
1368
     * </code>
1369
     *
1370
     * @param string $to_encoding                   <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1371
     * @param string $str                           <p>The input string</p>
1372
     * @param bool   $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double
1373
     *                                              encoding for UTF-8)<br> otherwise we auto-detect the current
1374
     *                                              string-encoding</p>
1375
     * @param string $from_encoding                 [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1376
     *                                              A empty string will trigger the autodetect anyway.</p>
1377
     *
1378
     * @psalm-pure
1379
     *
1380
     * @return string
1381
     *
1382
     * @psalm-suppress InvalidReturnStatement
1383
     */
1384 29
    public static function encode(
1385
        string $to_encoding,
1386
        string $str,
1387
        bool $auto_detect_the_from_encoding = true,
1388
        string $from_encoding = ''
1389
    ): string {
1390 29
        if ($str === '' || $to_encoding === '') {
1391 13
            return $str;
1392
        }
1393
1394 29
        if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') {
1395 7
            $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8');
1396
        }
1397
1398 29
        if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') {
1399 2
            $from_encoding = self::normalize_encoding($from_encoding);
1400
        }
1401
1402
        if (
1403 29
            $to_encoding
1404
            &&
1405 29
            $from_encoding
1406
            &&
1407 29
            $from_encoding === $to_encoding
1408
        ) {
1409
            return $str;
1410
        }
1411
1412 29
        if ($to_encoding === 'JSON') {
1413 1
            $return = self::json_encode($str);
1414 1
            if ($return === false) {
1415
                throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().');
1416
            }
1417
1418 1
            return $return;
1419
        }
1420 29
        if ($from_encoding === 'JSON') {
1421 1
            $str = self::json_decode($str);
1422 1
            $from_encoding = '';
1423
        }
1424
1425 29
        if ($to_encoding === 'BASE64') {
1426 2
            return \base64_encode($str);
1427
        }
1428 29
        if ($from_encoding === 'BASE64') {
1429 2
            $str = \base64_decode($str, true);
1430 2
            $from_encoding = '';
1431
        }
1432
1433 29
        if ($to_encoding === 'HTML-ENTITIES') {
1434 2
            return self::html_encode($str, true);
1435
        }
1436 29
        if ($from_encoding === 'HTML-ENTITIES') {
1437 2
            $str = self::html_entity_decode($str, \ENT_COMPAT);
1438 2
            $from_encoding = '';
1439
        }
1440
1441 29
        $from_encoding_auto_detected = false;
1442
        if (
1443 29
            $auto_detect_the_from_encoding
1444
            ||
1445 29
            !$from_encoding
1446
        ) {
1447 29
            $from_encoding_auto_detected = self::str_detect_encoding($str);
1448
        }
1449
1450
        // DEBUG
1451
        //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n");
1452
1453 29
        if ($from_encoding_auto_detected !== false) {
1454
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1455 25
            $from_encoding = $from_encoding_auto_detected;
1456 7
        } elseif ($auto_detect_the_from_encoding) {
1457
            // fallback for the "autodetect"-mode
1458 7
            return self::to_utf8($str);
1459
        }
1460
1461
        if (
1462 25
            !$from_encoding
1463
            ||
1464 25
            $from_encoding === $to_encoding
1465
        ) {
1466 15
            return $str;
1467
        }
1468
1469
        if (
1470 20
            $to_encoding === 'UTF-8'
1471
            &&
1472
            (
1473 18
                $from_encoding === 'WINDOWS-1252'
1474
                ||
1475 20
                $from_encoding === 'ISO-8859-1'
1476
            )
1477
        ) {
1478 14
            return self::to_utf8($str);
1479
        }
1480
1481
        if (
1482 12
            $to_encoding === 'ISO-8859-1'
1483
            &&
1484
            (
1485 6
                $from_encoding === 'WINDOWS-1252'
1486
                ||
1487 12
                $from_encoding === 'UTF-8'
1488
            )
1489
        ) {
1490 6
            return self::to_iso8859($str);
1491
        }
1492
1493
        /** @noinspection InArrayCanBeUsedInspection */
1494
        if (
1495 10
            $to_encoding !== 'UTF-8'
1496
            &&
1497 10
            $to_encoding !== 'ISO-8859-1'
1498
            &&
1499 10
            $to_encoding !== 'WINDOWS-1252'
1500
            &&
1501 10
            self::$SUPPORT['mbstring'] === false
1502
        ) {
1503
            /**
1504
             * @psalm-suppress ImpureFunctionCall - is is only a warning
1505
             */
1506
            \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING);
1507
        }
1508
1509 10
        if (self::$SUPPORT['mbstring'] === true) {
1510
            // warning: do not use the symfony polyfill here
1511 10
            $str_encoded = \mb_convert_encoding(
1512 10
                $str,
1513 10
                $to_encoding,
1514 10
                $from_encoding
1515
            );
1516
1517 10
            if ($str_encoded) {
1518 10
                \assert(\is_string($str_encoded));
1519
1520 10
                return $str_encoded;
1521
            }
1522
        }
1523
1524
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */
1525
        $return = @\iconv($from_encoding, $to_encoding, $str);
1526
        if ($return !== false) {
1527
            return $return;
1528
        }
1529
1530
        return $str;
1531
    }
1532
1533
    /**
1534
     * @param string $str
1535
     * @param string $from_charset      [optional] <p>Set the input charset.</p>
1536
     * @param string $to_charset        [optional] <p>Set the output charset.</p>
1537
     * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p>
1538
     * @param string $linefeed          [optional] <p>Set the used linefeed.</p>
1539
     * @param int    $indent            [optional] <p>Set the max length indent.</p>
1540
     *
1541
     * @psalm-pure
1542
     *
1543
     * @return false|string
1544
     *                      <p>An encoded MIME field on success,
1545
     *                      or false if an error occurs during the encoding.</p>
1546
     */
1547 1
    public static function encode_mimeheader(
1548
        string $str,
1549
        string $from_charset = 'UTF-8',
1550
        string $to_charset = 'UTF-8',
1551
        string $transfer_encoding = 'Q',
1552
        string $linefeed = "\r\n",
1553
        int $indent = 76
1554
    ) {
1555 1
        if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') {
1556
            $from_charset = self::normalize_encoding($from_charset, 'UTF-8');
1557
        }
1558
1559 1
        if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') {
1560 1
            $to_charset = self::normalize_encoding($to_charset, 'UTF-8');
1561
        }
1562
1563
        // always fallback via symfony polyfill
1564 1
        return \iconv_mime_encode(
1565 1
            '',
1566 1
            $str,
1567
            [
1568 1
                'scheme'           => $transfer_encoding,
1569 1
                'line-length'      => $indent,
1570 1
                'input-charset'    => $from_charset,
1571 1
                'output-charset'   => $to_charset,
1572 1
                'line-break-chars' => $linefeed,
1573
            ]
1574
        );
1575
    }
1576
1577
    /**
1578
     * Create an extract from a sentence, so if the search-string was found, it try to centered in the output.
1579
     *
1580
     * @param string   $str                       <p>The input string.</p>
1581
     * @param string   $search                    <p>The searched string.</p>
1582
     * @param int|null $length                    [optional] <p>Default: null === text->length / 2</p>
1583
     * @param string   $replacer_for_skipped_text [optional] <p>Default: …</p>
1584
     * @param string   $encoding                  [optional] <p>Set the charset for e.g. "mb_" function</p>
1585
     *
1586
     * @psalm-pure
1587
     *
1588
     * @return string
1589
     */
1590 1
    public static function extract_text(
1591
        string $str,
1592
        string $search = '',
1593
        int $length = null,
1594
        string $replacer_for_skipped_text = '…',
1595
        string $encoding = 'UTF-8'
1596
    ): string {
1597 1
        if ($str === '') {
1598 1
            return '';
1599
        }
1600
1601 1
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
1602
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
1603
        }
1604
1605 1
        $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&";
1606
1607 1
        if ($length === null) {
1608 1
            $length = (int) \round((int) self::strlen($str, $encoding) / 2);
1609
        }
1610
1611 1
        if ($search === '') {
1612 1
            if ($encoding === 'UTF-8') {
1613 1
                if ($length > 0) {
1614 1
                    $string_length = (int) \mb_strlen($str);
1615 1
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1616
                } else {
1617 1
                    $end = 0;
1618
                }
1619
1620 1
                $pos = (int) \min(
1621 1
                    \mb_strpos($str, ' ', $end),
1622 1
                    \mb_strpos($str, '.', $end)
1623
                );
1624
            } else {
1625
                if ($length > 0) {
1626
                    $string_length = (int) self::strlen($str, $encoding);
1627
                    $end = ($length - 1) > $string_length ? $string_length : ($length - 1);
1628
                } else {
1629
                    $end = 0;
1630
                }
1631
1632
                $pos = (int) \min(
1633
                    self::strpos($str, ' ', $end, $encoding),
1634
                    self::strpos($str, '.', $end, $encoding)
1635
                );
1636
            }
1637
1638 1
            if ($pos) {
1639 1
                if ($encoding === 'UTF-8') {
1640 1
                    $str_sub = \mb_substr($str, 0, $pos);
1641
                } else {
1642
                    $str_sub = self::substr($str, 0, $pos, $encoding);
1643
                }
1644
1645 1
                if ($str_sub === false) {
1646
                    return '';
1647
                }
1648
1649 1
                return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1650
            }
1651
1652
            return $str;
1653
        }
1654
1655 1
        if ($encoding === 'UTF-8') {
1656 1
            $word_position = (int) \mb_stripos($str, $search);
1657 1
            $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2);
1658
        } else {
1659
            $word_position = (int) self::stripos($str, $search, 0, $encoding);
1660
            $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2);
1661
        }
1662
1663 1
        $pos_start = 0;
1664 1
        if ($half_side > 0) {
1665 1
            if ($encoding === 'UTF-8') {
1666 1
                $half_text = \mb_substr($str, 0, $half_side);
1667
            } else {
1668
                $half_text = self::substr($str, 0, $half_side, $encoding);
1669
            }
1670 1
            if ($half_text !== false) {
1671 1
                if ($encoding === 'UTF-8') {
1672 1
                    $pos_start = (int) \max(
1673 1
                        \mb_strrpos($half_text, ' '),
1674 1
                        \mb_strrpos($half_text, '.')
1675
                    );
1676
                } else {
1677
                    $pos_start = (int) \max(
1678
                        self::strrpos($half_text, ' ', 0, $encoding),
1679
                        self::strrpos($half_text, '.', 0, $encoding)
1680
                    );
1681
                }
1682
            }
1683
        }
1684
1685 1
        if ($word_position && $half_side > 0) {
1686 1
            $offset = $pos_start + $length - 1;
1687 1
            $real_length = (int) self::strlen($str, $encoding);
1688
1689 1
            if ($offset > $real_length) {
1690
                $offset = $real_length;
1691
            }
1692
1693 1
            if ($encoding === 'UTF-8') {
1694 1
                $pos_end = (int) \min(
1695 1
                    \mb_strpos($str, ' ', $offset),
1696 1
                    \mb_strpos($str, '.', $offset)
1697 1
                ) - $pos_start;
1698
            } else {
1699
                $pos_end = (int) \min(
1700
                    self::strpos($str, ' ', $offset, $encoding),
1701
                    self::strpos($str, '.', $offset, $encoding)
1702
                ) - $pos_start;
1703
            }
1704
1705 1
            if (!$pos_end || $pos_end <= 0) {
1706 1
                if ($encoding === 'UTF-8') {
1707 1
                    $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str));
1708
                } else {
1709
                    $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding);
1710
                }
1711 1
                if ($str_sub !== false) {
1712 1
                    $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars);
1713
                } else {
1714 1
                    $extract = '';
1715
                }
1716
            } else {
1717 1
                if ($encoding === 'UTF-8') {
1718 1
                    $str_sub = \mb_substr($str, $pos_start, $pos_end);
1719
                } else {
1720
                    $str_sub = self::substr($str, $pos_start, $pos_end, $encoding);
1721
                }
1722 1
                if ($str_sub !== false) {
1723 1
                    $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1724
                } else {
1725 1
                    $extract = '';
1726
                }
1727
            }
1728
        } else {
1729 1
            $offset = $length - 1;
1730 1
            $true_length = (int) self::strlen($str, $encoding);
1731
1732 1
            if ($offset > $true_length) {
1733
                $offset = $true_length;
1734
            }
1735
1736 1
            if ($encoding === 'UTF-8') {
1737 1
                $pos_end = (int) \min(
1738 1
                    \mb_strpos($str, ' ', $offset),
1739 1
                    \mb_strpos($str, '.', $offset)
1740
                );
1741
            } else {
1742
                $pos_end = (int) \min(
1743
                    self::strpos($str, ' ', $offset, $encoding),
1744
                    self::strpos($str, '.', $offset, $encoding)
1745
                );
1746
            }
1747
1748 1
            if ($pos_end) {
1749 1
                if ($encoding === 'UTF-8') {
1750 1
                    $str_sub = \mb_substr($str, 0, $pos_end);
1751
                } else {
1752
                    $str_sub = self::substr($str, 0, $pos_end, $encoding);
1753
                }
1754 1
                if ($str_sub !== false) {
1755 1
                    $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text;
1756
                } else {
1757 1
                    $extract = '';
1758
                }
1759
            } else {
1760 1
                $extract = $str;
1761
            }
1762
        }
1763
1764 1
        return $extract;
1765
    }
1766
1767
    /**
1768
     * Reads entire file into a string.
1769
     *
1770
     * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code>
1771
     *
1772
     * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!!
1773
     *
1774
     * @see http://php.net/manual/en/function.file-get-contents.php
1775
     *
1776
     * @param string        $filename         <p>
1777
     *                                        Name of the file to read.
1778
     *                                        </p>
1779
     * @param bool          $use_include_path [optional] <p>
1780
     *                                        Prior to PHP 5, this parameter is called
1781
     *                                        use_include_path and is a bool.
1782
     *                                        As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1783
     *                                        to trigger include path
1784
     *                                        search.
1785
     *                                        </p>
1786
     * @param resource|null $context          [optional] <p>
1787
     *                                        A valid context resource created with
1788
     *                                        stream_context_create. If you don't need to use a
1789
     *                                        custom context, you can skip this parameter by &null;.
1790
     *                                        </p>
1791
     * @param int|null      $offset           [optional] <p>
1792
     *                                        The offset where the reading starts.
1793
     *                                        </p>
1794
     * @param int|null      $max_length       [optional] <p>
1795
     *                                        Maximum length of data read. The default is to read until end
1796
     *                                        of file is reached.
1797
     *                                        </p>
1798
     * @param int           $timeout          <p>The time in seconds for the timeout.</p>
1799
     * @param bool          $convert_to_utf8  <strong>WARNING!!!</strong> <p>Maybe you can't use this option for
1800
     *                                        some files, because they used non default utf-8 chars. Binary files
1801
     *                                        like images or pdf will not be converted.</p>
1802
     * @param string        $from_encoding    [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br>
1803
     *                                        A empty string will trigger the autodetect anyway.</p>
1804
     *
1805
     * @psalm-pure
1806
     *
1807
     * @return false|string
1808
     *                      <p>The function returns the read data as string or <b>false</b> on failure.</p>
1809
     *
1810
     * @noinspection PhpTooManyParametersInspection
1811
     */
1812 12
    public static function file_get_contents(
1813
        string $filename,
1814
        bool $use_include_path = false,
1815
        $context = null,
1816
        int $offset = null,
1817
        int $max_length = null,
1818
        int $timeout = 10,
1819
        bool $convert_to_utf8 = true,
1820
        string $from_encoding = ''
1821
    ) {
1822
        // init
1823 12
        $filename = \filter_var($filename, \FILTER_SANITIZE_STRING);
1824
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
1825 12
        if ($filename === false) {
1826
            return false;
1827
        }
1828
1829 12
        if ($timeout && $context === null) {
1830 9
            $context = \stream_context_create(
1831
                [
1832
                    'http' => [
1833 9
                        'timeout' => $timeout,
1834
                    ],
1835
                ]
1836
            );
1837
        }
1838
1839 12
        if ($offset === null) {
1840 12
            $offset = 0;
1841
        }
1842
1843 12
        if (\is_int($max_length)) {
1844 2
            $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length);
1845
        } else {
1846 12
            $data = \file_get_contents($filename, $use_include_path, $context, $offset);
1847
        }
1848
1849
        // return false on error
1850 12
        if ($data === false) {
1851
            return false;
1852
        }
1853
1854 12
        if ($convert_to_utf8) {
1855
            if (
1856 12
                !self::is_binary($data, true)
1857
                ||
1858 9
                self::is_utf16($data, false) !== false
1859
                ||
1860 12
                self::is_utf32($data, false) !== false
1861
            ) {
1862 9
                $data = self::encode('UTF-8', $data, false, $from_encoding);
1863 9
                $data = self::cleanup($data);
1864
            }
1865
        }
1866
1867 12
        return $data;
1868
    }
1869
1870
    /**
1871
     * Checks if a file starts with BOM (Byte Order Mark) character.
1872
     *
1873
     * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code>
1874
     *
1875
     * @param string $file_path <p>Path to a valid file.</p>
1876
     *
1877
     * @throws \RuntimeException if file_get_contents() returned false
1878
     *
1879
     * @return bool
1880
     *              <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p>
1881
     *
1882
     * @psalm-pure
1883
     */
1884 2
    public static function file_has_bom(string $file_path): bool
1885
    {
1886 2
        $file_content = \file_get_contents($file_path);
1887 2
        if ($file_content === false) {
1888
            throw new \RuntimeException('file_get_contents() returned false for:' . $file_path);
1889
        }
1890
1891 2
        return self::string_has_bom($file_content);
1892
    }
1893
1894
    /**
1895
     * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1896
     *
1897
     * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code>
1898
     *
1899
     * @param array|object|string $var
1900
     * @param int                 $normalization_form
1901
     * @param string              $leading_combining
1902
     *
1903
     * @psalm-pure
1904
     *
1905
     * @return mixed
1906
     *
1907
     * @template TFilter
1908
     * @psalm-param TFilter $var
1909
     * @psalm-return TFilter
1910
     */
1911 65
    public static function filter(
1912
        $var,
1913
        int $normalization_form = \Normalizer::NFC,
1914
        string $leading_combining = '◌'
1915
    ) {
1916 65
        switch (\gettype($var)) {
1917 65
            case 'object':
1918 65
            case 'array':
1919 6
                foreach ($var as $k => &$v) {
1920 6
                    $v = self::filter($v, $normalization_form, $leading_combining);
1921
                }
1922 6
                unset($v);
1923
1924 6
                break;
1925 65
            case 'string':
1926
1927 63
                if (\strpos($var, "\r") !== false) {
1928 3
                    $var = self::normalize_line_ending($var);
1929
                }
1930
1931 63
                if (!ASCII::is_ascii($var)) {
1932 33
                    if (\Normalizer::isNormalized($var, $normalization_form)) {
1933 27
                        $n = '-';
1934
                    } else {
1935 13
                        $n = \Normalizer::normalize($var, $normalization_form);
1936
1937 13
                        if (isset($n[0])) {
1938 7
                            $var = $n;
1939
                        } else {
1940 9
                            $var = self::encode('UTF-8', $var);
1941
                        }
1942
                    }
1943
1944 33
                    \assert(\is_string($var));
1945
                    if (
1946 33
                        $var[0] >= "\x80"
1947
                        &&
1948 33
                        isset($n[0], $leading_combining[0])
1949
                        &&
1950 33
                        \preg_match('/^\\p{Mn}/u', $var)
1951
                    ) {
1952
                        // Prevent leading combining chars
1953
                        // for NFC-safe concatenations.
1954 3
                        $var = $leading_combining . $var;
1955
                    }
1956
                }
1957
1958 63
                break;
1959
            default:
1960
                // nothing
1961
        }
1962
1963
        /** @noinspection PhpSillyAssignmentInspection */
1964
        /** @psalm-var TFilter $var */
1965 65
        $var = $var;
1966
1967 65
        return $var;
1968
    }
1969
1970
    /**
1971
     * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1972
     *
1973
     * Gets a specific external variable by name and optionally filters it.
1974
     *
1975
     * EXAMPLE: <code>
1976
     * // _GET['foo'] = 'bar';
1977
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_SANITIZE_STRING)); // 'bar'
1978
     * </code>
1979
     *
1980
     * @see http://php.net/manual/en/function.filter-input.php
1981
     *
1982
     * @param int            $type          <p>
1983
     *                                      One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1984
     *                                      <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1985
     *                                      <b>INPUT_ENV</b>.
1986
     *                                      </p>
1987
     * @param string         $variable_name <p>
1988
     *                                      Name of a variable to get.
1989
     *                                      </p>
1990
     * @param int            $filter        [optional] <p>
1991
     *                                      The ID of the filter to apply. The
1992
     *                                      manual page lists the available filters.
1993
     *                                      </p>
1994
     * @param int|int[]|null $options       [optional] <p>
1995
     *                                      Associative array of options or bitwise disjunction of flags. If filter
1996
     *                                      accepts options, flags can be provided in "flags" field of array.
1997
     *                                      </p>
1998
     *
1999
     * @psalm-pure
2000
     *
2001
     * @return mixed
2002
     *               <p>
2003
     *               Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the
2004
     *               <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
2005
     *               returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
2006
     *               </p>
2007
     */
2008 1
    public static function filter_input(
2009
        int $type,
2010
        string $variable_name,
2011
        int $filter = \FILTER_DEFAULT,
2012
        $options = null
2013
    ) {
2014
        /**
2015
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2016
         */
2017 1
        if ($options === null || \func_num_args() < 4) {
2018 1
            $var = \filter_input($type, $variable_name, $filter);
2019
        } else {
2020
            $var = \filter_input($type, $variable_name, $filter, $options);
2021
        }
2022
2023 1
        return self::filter($var);
2024
    }
2025
2026
    /**
2027
     * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2028
     *
2029
     * Gets external variables and optionally filters them.
2030
     *
2031
     * EXAMPLE: <code>
2032
     * // _GET['foo'] = 'bar';
2033
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_SANITIZE_STRING')); // array('bar')
2034
     * </code>
2035
     *
2036
     * @see http://php.net/manual/en/function.filter-input-array.php
2037
     *
2038
     * @param int        $type       <p>
2039
     *                               One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
2040
     *                               <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
2041
     *                               <b>INPUT_ENV</b>.
2042
     *                               </p>
2043
     * @param array|null $definition [optional] <p>
2044
     *                               An array defining the arguments. A valid key is a string
2045
     *                               containing a variable name and a valid value is either a filter type, or an array
2046
     *                               optionally specifying the filter, flags and options. If the value is an
2047
     *                               array, valid keys are filter which specifies the
2048
     *                               filter type,
2049
     *                               flags which specifies any flags that apply to the
2050
     *                               filter, and options which specifies any options that
2051
     *                               apply to the filter. See the example below for a better understanding.
2052
     *                               </p>
2053
     *                               <p>
2054
     *                               This parameter can be also an integer holding a filter constant. Then all values in the
2055
     *                               input array are filtered by this filter.
2056
     *                               </p>
2057
     * @param bool       $add_empty  [optional] <p>
2058
     *                               Add missing keys as <b>NULL</b> to the return value.
2059
     *                               </p>
2060
     *
2061
     * @psalm-pure
2062
     *
2063
     * @return mixed
2064
     *               <p>
2065
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2066
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2067
     *               set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable
2068
     *               is not set and <b>NULL</b> if the filter fails.
2069
     *               </p>
2070
     */
2071 1
    public static function filter_input_array(
2072
        int $type,
2073
        $definition = null,
2074
        bool $add_empty = true
2075
    ) {
2076
        /**
2077
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2078
         */
2079 1
        if ($definition === null || \func_num_args() < 2) {
2080
            $a = \filter_input_array($type);
2081
        } else {
2082 1
            $a = \filter_input_array($type, $definition, $add_empty);
2083
        }
2084
2085 1
        return self::filter($a);
2086
    }
2087
2088
    /**
2089
     * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2090
     *
2091
     * Filters a variable with a specified filter.
2092
     *
2093
     * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code>
2094
     *
2095
     * @see http://php.net/manual/en/function.filter-var.php
2096
     *
2097
     * @param float|int|string|null $variable <p>
2098
     *                                        Value to filter.
2099
     *                                        </p>
2100
     * @param int                   $filter   [optional] <p>
2101
     *                                        The ID of the filter to apply. The
2102
     *                                        manual page lists the available filters.
2103
     *                                        </p>
2104
     * @param int|int[]|null        $options  [optional] <p>
2105
     *                                        Associative array of options or bitwise disjunction of flags. If filter
2106
     *                                        accepts options, flags can be provided in "flags" field of array. For
2107
     *                                        the "callback" filter, callable type should be passed. The
2108
     *                                        callback must accept one argument, the value to be filtered, and return
2109
     *                                        the value after filtering/sanitizing it.
2110
     *                                        </p>
2111
     *                                        <p>
2112
     *                                        <code>
2113
     *                                        // for filters that accept options, use this format
2114
     *                                        $options = array(
2115
     *                                        'options' => array(
2116
     *                                        'default' => 3, // value to return if the filter fails
2117
     *                                        // other options here
2118
     *                                        'min_range' => 0
2119
     *                                        ),
2120
     *                                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
2121
     *                                        );
2122
     *                                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
2123
     *                                        // for filter that only accept flags, you can pass them directly
2124
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
2125
     *                                        // for filter that only accept flags, you can also pass as an array
2126
     *                                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
2127
     *                                        array('flags' => FILTER_NULL_ON_FAILURE));
2128
     *                                        // callback validate filter
2129
     *                                        function foo($value)
2130
     *                                        {
2131
     *                                        // Expected format: Surname, GivenNames
2132
     *                                        if (strpos($value, ", ") === false) return false;
2133
     *                                        list($surname, $givennames) = explode(", ", $value, 2);
2134
     *                                        $empty = (empty($surname) || empty($givennames));
2135
     *                                        $notstrings = (!is_string($surname) || !is_string($givennames));
2136
     *                                        if ($empty || $notstrings) {
2137
     *                                        return false;
2138
     *                                        } else {
2139
     *                                        return $value;
2140
     *                                        }
2141
     *                                        }
2142
     *                                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
2143
     *                                        </code>
2144
     *                                        </p>
2145
     *
2146
     * @psalm-pure
2147
     *
2148
     * @return mixed
2149
     *               <p>The filtered data, or <b>FALSE</b> if the filter fails.</p>
2150
     */
2151 2
    public static function filter_var(
2152
        $variable,
2153
        int $filter = \FILTER_DEFAULT,
2154
        $options = null
2155
    ) {
2156
        /**
2157
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2158
         */
2159 2
        if (\func_num_args() < 3) {
2160 2
            $variable = \filter_var($variable, $filter);
2161
        } else {
2162 2
            $variable = \filter_var($variable, $filter, $options);
2163
        }
2164
2165 2
        return self::filter($variable);
2166
    }
2167
2168
    /**
2169
     * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
2170
     *
2171
     * Gets multiple variables and optionally filters them.
2172
     *
2173
     * EXAMPLE: <code>
2174
     * $filters = [
2175
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
2176
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
2177
     *     'email' => FILTER_VALIDATE_EMAIL,
2178
     * ];
2179
     *
2180
     * $data = [
2181
     *     'name' => 'κόσμε',
2182
     *     'age' => '18',
2183
     *     'email' => '[email protected]'
2184
     * ];
2185
     *
2186
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]']
2187
     * </code>
2188
     *
2189
     * @see http://php.net/manual/en/function.filter-var-array.php
2190
     *
2191
     * @param array<mixed>   $data       <p>
2192
     *                                   An array with string keys containing the data to filter.
2193
     *                                   </p>
2194
     * @param array|int|null $definition [optional] <p>
2195
     *                                   An array defining the arguments. A valid key is a string
2196
     *                                   containing a variable name and a valid value is either a
2197
     *                                   filter type, or an
2198
     *                                   array optionally specifying the filter, flags and options.
2199
     *                                   If the value is an array, valid keys are filter
2200
     *                                   which specifies the filter type,
2201
     *                                   flags which specifies any flags that apply to the
2202
     *                                   filter, and options which specifies any options that
2203
     *                                   apply to the filter. See the example below for a better understanding.
2204
     *                                   </p>
2205
     *                                   <p>
2206
     *                                   This parameter can be also an integer holding a filter constant. Then all values
2207
     *                                   in the input array are filtered by this filter.
2208
     *                                   </p>
2209
     * @param bool           $add_empty  [optional] <p>
2210
     *                                   Add missing keys as <b>NULL</b> to the return value.
2211
     *                                   </p>
2212
     *
2213
     * @psalm-pure
2214
     *
2215
     * @return mixed
2216
     *               <p>
2217
     *               An array containing the values of the requested variables on success, or <b>FALSE</b> on failure.
2218
     *               An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not
2219
     *               set.
2220
     *               </p>
2221
     */
2222 2
    public static function filter_var_array(
2223
        array $data,
2224
        $definition = null,
2225
        bool $add_empty = true
2226
    ) {
2227
        /**
2228
         * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here
2229
         */
2230 2
        if (\func_num_args() < 2) {
2231 2
            $a = \filter_var_array($data);
2232
        } else {
2233 2
            $a = \filter_var_array($data, $definition, $add_empty);
2234
        }
2235
2236 2
        return self::filter($a);
2237
    }
2238
2239
    /**
2240
     * Checks whether finfo is available on the server.
2241
     *
2242
     * @psalm-pure
2243
     *
2244
     * @return bool
2245
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
2246
     */
2247
    public static function finfo_loaded(): bool
2248
    {
2249
        return \class_exists('finfo');
2250
    }
2251
2252
    /**
2253
     * Returns the first $n characters of the string.
2254
     *
2255
     * @param string $str      <p>The input string.</p>
2256
     * @param int    $n        <p>Number of characters to retrieve from the start.</p>
2257
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
2258
     *
2259
     * @psalm-pure
2260
     *
2261
     * @return string
2262
     */
2263 13
    public static function first_char(
2264
        string $str,
2265
        int $n = 1,
2266
        string $encoding = 'UTF-8'
2267
    ): string {
2268 13
        if ($str === '' || $n <= 0) {
2269 5
            return '';
2270
        }
2271
2272 8
        if ($encoding === 'UTF-8') {
2273 4
            return (string) \mb_substr($str, 0, $n);
2274
        }
2275
2276 4
        return (string) self::substr($str, 0, $n, $encoding);
2277
    }
2278
2279
    /**
2280
     * Check if the number of Unicode characters isn't greater than the specified integer.
2281
     *
2282
     * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code>
2283
     *
2284
     * @param string $str      the original string to be checked
2285
     * @param int    $box_size the size in number of chars to be checked against string
2286
     *
2287
     * @psalm-pure
2288
     *
2289
     * @return bool
2290
     *              <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p>
2291
     */
2292 2
    public static function fits_inside(string $str, int $box_size): bool
2293
    {
2294 2
        return (int) self::strlen($str) <= $box_size;
2295
    }
2296
2297
    /**
2298
     * Try to fix simple broken UTF-8 strings.
2299
     *
2300
     * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
2301
     *
2302
     * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code>
2303
     *
2304
     * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
2305
     * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
2306
     * See: http://en.wikipedia.org/wiki/Windows-1252
2307
     *
2308
     * @param string $str <p>The input string</p>
2309
     *
2310
     * @psalm-pure
2311
     *
2312
     * @return string
2313
     */
2314 47
    public static function fix_simple_utf8(string $str): string
2315
    {
2316 47
        if ($str === '') {
2317 4
            return '';
2318
        }
2319
2320
        /**
2321
         * @psalm-suppress ImpureStaticVariable
2322
         *
2323
         * @var array<mixed>|null
2324
         */
2325 47
        static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
2326
2327
        /**
2328
         * @psalm-suppress ImpureStaticVariable
2329
         *
2330
         * @var array<mixed>|null
2331
         */
2332 47
        static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
2333
2334 47
        if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
2335 1
            if (self::$BROKEN_UTF8_FIX === null) {
2336 1
                self::$BROKEN_UTF8_FIX = self::getData('utf8_fix');
2337
            }
2338
2339 1
            $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX);
2340 1
            $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX;
2341
        }
2342
2343 47
        \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE));
2344
2345 47
        return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
2346
    }
2347
2348
    /**
2349
     * Fix a double (or multiple) encoded UTF8 string.
2350
     *
2351
     * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code>
2352
     *
2353
     * @param string|string[] $str you can use a string or an array of strings
2354
     *
2355
     * @psalm-pure
2356
     *
2357
     * @return string|string[]
2358
     *                         Will return the fixed input-"array" or
2359
     *                         the fixed input-"string"
2360
     *
2361
     * @psalm-suppress InvalidReturnType
2362
     */
2363 2
    public static function fix_utf8($str)
2364
    {
2365 2
        if (\is_array($str)) {
2366 2
            foreach ($str as $k => &$v) {
2367 2
                $v = self::fix_utf8($v);
2368
            }
2369 2
            unset($v);
2370
2371
            /**
2372
             * @psalm-suppress InvalidReturnStatement
2373
             */
2374 2
            return $str;
2375
        }
2376
2377 2
        $str = (string) $str;
2378 2
        $last = '';
2379 2
        while ($last !== $str) {
2380 2
            $last = $str;
2381
            /**
2382
             * @psalm-suppress PossiblyInvalidArgument
2383
             */
2384 2
            $str = self::to_utf8(
2385 2
                self::utf8_decode($str, true)
2386
            );
2387
        }
2388
2389
        /**
2390
         * @psalm-suppress InvalidReturnStatement
2391
         */
2392 2
        return $str;
2393
    }
2394
2395
    /**
2396
     * Get character of a specific character.
2397
     *
2398
     * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code>
2399
     *
2400
     * @param string $char
2401
     *
2402
     * @psalm-pure
2403
     *
2404
     * @return string
2405
     *                <p>'RTL' or 'LTR'.</p>
2406
     */
2407 2
    public static function getCharDirection(string $char): string
2408
    {
2409 2
        if (self::$SUPPORT['intlChar'] === true) {
2410
            /** @noinspection PhpComposerExtensionStubsInspection */
2411 2
            $tmp_return = \IntlChar::charDirection($char);
2412
2413
            // from "IntlChar"-Class
2414
            $char_direction = [
2415 2
                'RTL' => [1, 13, 14, 15, 21],
2416
                'LTR' => [0, 11, 12, 20],
2417
            ];
2418
2419 2
            if (\in_array($tmp_return, $char_direction['LTR'], true)) {
2420
                return 'LTR';
2421
            }
2422
2423 2
            if (\in_array($tmp_return, $char_direction['RTL'], true)) {
2424 2
                return 'RTL';
2425
            }
2426
        }
2427
2428 2
        $c = static::chr_to_decimal($char);
2429
2430 2
        if (!($c >= 0x5be && $c <= 0x10b7f)) {
2431 2
            return 'LTR';
2432
        }
2433
2434 2
        if ($c <= 0x85e) {
2435 2
            if ($c === 0x5be ||
2436 2
                $c === 0x5c0 ||
2437 2
                $c === 0x5c3 ||
2438 2
                $c === 0x5c6 ||
2439 2
                ($c >= 0x5d0 && $c <= 0x5ea) ||
2440 2
                ($c >= 0x5f0 && $c <= 0x5f4) ||
2441 2
                $c === 0x608 ||
2442 2
                $c === 0x60b ||
2443 2
                $c === 0x60d ||
2444 2
                $c === 0x61b ||
2445 2
                ($c >= 0x61e && $c <= 0x64a) ||
2446
                ($c >= 0x66d && $c <= 0x66f) ||
2447
                ($c >= 0x671 && $c <= 0x6d5) ||
2448
                ($c >= 0x6e5 && $c <= 0x6e6) ||
2449
                ($c >= 0x6ee && $c <= 0x6ef) ||
2450
                ($c >= 0x6fa && $c <= 0x70d) ||
2451
                $c === 0x710 ||
2452
                ($c >= 0x712 && $c <= 0x72f) ||
2453
                ($c >= 0x74d && $c <= 0x7a5) ||
2454
                $c === 0x7b1 ||
2455
                ($c >= 0x7c0 && $c <= 0x7ea) ||
2456
                ($c >= 0x7f4 && $c <= 0x7f5) ||
2457
                $c === 0x7fa ||
2458
                ($c >= 0x800 && $c <= 0x815) ||
2459
                $c === 0x81a ||
2460
                $c === 0x824 ||
2461
                $c === 0x828 ||
2462
                ($c >= 0x830 && $c <= 0x83e) ||
2463
                ($c >= 0x840 && $c <= 0x858) ||
2464 2
                $c === 0x85e
2465
            ) {
2466 2
                return 'RTL';
2467
            }
2468 2
        } elseif ($c === 0x200f) {
2469
            return 'RTL';
2470 2
        } elseif ($c >= 0xfb1d) {
2471 2
            if ($c === 0xfb1d ||
2472 2
                ($c >= 0xfb1f && $c <= 0xfb28) ||
2473 2
                ($c >= 0xfb2a && $c <= 0xfb36) ||
2474 2
                ($c >= 0xfb38 && $c <= 0xfb3c) ||
2475 2
                $c === 0xfb3e ||
2476 2
                ($c >= 0xfb40 && $c <= 0xfb41) ||
2477 2
                ($c >= 0xfb43 && $c <= 0xfb44) ||
2478 2
                ($c >= 0xfb46 && $c <= 0xfbc1) ||
2479 2
                ($c >= 0xfbd3 && $c <= 0xfd3d) ||
2480 2
                ($c >= 0xfd50 && $c <= 0xfd8f) ||
2481 2
                ($c >= 0xfd92 && $c <= 0xfdc7) ||
2482 2
                ($c >= 0xfdf0 && $c <= 0xfdfc) ||
2483 2
                ($c >= 0xfe70 && $c <= 0xfe74) ||
2484 2
                ($c >= 0xfe76 && $c <= 0xfefc) ||
2485 2
                ($c >= 0x10800 && $c <= 0x10805) ||
2486 2
                $c === 0x10808 ||
2487 2
                ($c >= 0x1080a && $c <= 0x10835) ||
2488 2
                ($c >= 0x10837 && $c <= 0x10838) ||
2489 2
                $c === 0x1083c ||
2490 2
                ($c >= 0x1083f && $c <= 0x10855) ||
2491 2
                ($c >= 0x10857 && $c <= 0x1085f) ||
2492 2
                ($c >= 0x10900 && $c <= 0x1091b) ||
2493 2
                ($c >= 0x10920 && $c <= 0x10939) ||
2494 2
                $c === 0x1093f ||
2495 2
                $c === 0x10a00 ||
2496 2
                ($c >= 0x10a10 && $c <= 0x10a13) ||
2497 2
                ($c >= 0x10a15 && $c <= 0x10a17) ||
2498 2
                ($c >= 0x10a19 && $c <= 0x10a33) ||
2499 2
                ($c >= 0x10a40 && $c <= 0x10a47) ||
2500 2
                ($c >= 0x10a50 && $c <= 0x10a58) ||
2501 2
                ($c >= 0x10a60 && $c <= 0x10a7f) ||
2502 2
                ($c >= 0x10b00 && $c <= 0x10b35) ||
2503 2
                ($c >= 0x10b40 && $c <= 0x10b55) ||
2504 2
                ($c >= 0x10b58 && $c <= 0x10b72) ||
2505 2
                ($c >= 0x10b78 && $c <= 0x10b7f)
2506
            ) {
2507 2
                return 'RTL';
2508
            }
2509
        }
2510
2511 2
        return 'LTR';
2512
    }
2513
2514
    /**
2515
     * Check for php-support.
2516
     *
2517
     * @param string|null $key
2518
     *
2519
     * @psalm-pure
2520
     *
2521
     * @return mixed
2522
     *               Return the full support-"array", if $key === null<br>
2523
     *               return bool-value, if $key is used and available<br>
2524
     *               otherwise return <strong>null</strong>
2525
     */
2526 27
    public static function getSupportInfo(string $key = null)
2527
    {
2528 27
        if ($key === null) {
2529 4
            return self::$SUPPORT;
2530
        }
2531
2532 25
        if (self::$INTL_TRANSLITERATOR_LIST === null) {
2533 1
            self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
2534
        }
2535
        // compatibility fix for old versions
2536 25
        self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST;
2537
2538 25
        return self::$SUPPORT[$key] ?? null;
2539
    }
2540
2541
    /**
2542
     * Warning: this method only works for some file-types (png, jpg)
2543
     *          if you need more supported types, please use e.g. "finfo"
2544
     *
2545
     * @param string $str
2546
     * @param array  $fallback <p>with this keys: 'ext', 'mime', 'type'
2547
     *
2548
     * @psalm-pure
2549
     *
2550
     * @return null[]|string[]
2551
     *                         <p>with this keys: 'ext', 'mime', 'type'</p>
2552
     *
2553
     * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback
2554
     */
2555 40
    public static function get_file_type(
2556
        string $str,
2557
        array $fallback = [
2558
            'ext'  => null,
2559
            'mime' => 'application/octet-stream',
2560
            'type' => null,
2561
        ]
2562
    ): array {
2563 40
        if ($str === '') {
2564
            return $fallback;
2565
        }
2566
2567
        /** @var false|string $str_info - needed for PhpStan (stubs error) */
2568 40
        $str_info = \substr($str, 0, 2);
2569 40
        if ($str_info === false || \strlen($str_info) !== 2) {
2570 11
            return $fallback;
2571
        }
2572
2573
        // DEBUG
2574
        //var_dump($str_info);
2575
2576 36
        $str_info = \unpack('C2chars', $str_info);
2577
2578
        /** @noinspection PhpSillyAssignmentInspection */
2579
        /** @var array|false $str_info - needed for PhpStan (stubs error) */
2580 36
        $str_info = $str_info;
2581
2582 36
        if ($str_info === false) {
2583
            return $fallback;
2584
        }
2585
        /** @noinspection OffsetOperationsInspection */
2586 36
        $type_code = (int) ($str_info['chars1'] . $str_info['chars2']);
2587
2588
        // DEBUG
2589
        //var_dump($type_code);
2590
2591
        //
2592
        // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator
2593
        //
2594
        switch ($type_code) {
2595
            // WARNING: do not add too simple comparisons, because of false-positive results:
2596
            //
2597
            // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip',
2598
            // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ...
2599
            //
2600 36
            case 255216:
2601
                $ext = 'jpg';
2602
                $mime = 'image/jpeg';
2603
                $type = 'binary';
2604
2605
                break;
2606 36
            case 13780:
2607 7
                $ext = 'png';
2608 7
                $mime = 'image/png';
2609 7
                $type = 'binary';
2610
2611 7
                break;
2612
            default:
2613 35
                return $fallback;
2614
        }
2615
2616
        return [
2617 7
            'ext'  => $ext,
2618 7
            'mime' => $mime,
2619 7
            'type' => $type,
2620
        ];
2621
    }
2622
2623
    /**
2624
     * @param int    $length         <p>Length of the random string.</p>
2625
     * @param string $possible_chars [optional] <p>Characters string for the random selection.</p>
2626
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
2627
     *
2628
     * @return string
2629
     */
2630 1
    public static function get_random_string(
2631
        int $length,
2632
        string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
2633
        string $encoding = 'UTF-8'
2634
    ): string {
2635
        // init
2636 1
        $i = 0;
2637 1
        $str = '';
2638
2639
        //
2640
        // add random chars
2641
        //
2642
2643 1
        if ($encoding === 'UTF-8') {
2644 1
            $max_length = (int) \mb_strlen($possible_chars);
2645 1
            if ($max_length === 0) {
2646 1
                return '';
2647
            }
2648
2649 1
            while ($i < $length) {
2650
                try {
2651 1
                    $rand_int = \random_int(0, $max_length - 1);
2652
                } catch (\Exception $e) {
2653
                    /** @noinspection RandomApiMigrationInspection */
2654
                    $rand_int = \mt_rand(0, $max_length - 1);
2655
                }
2656 1
                $char = \mb_substr($possible_chars, $rand_int, 1);
2657 1
                if ($char !== false) {
2658 1
                    $str .= $char;
2659 1
                    ++$i;
2660
                }
2661
            }
2662
        } else {
2663
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2664
2665
            $max_length = (int) self::strlen($possible_chars, $encoding);
2666
            if ($max_length === 0) {
2667
                return '';
2668
            }
2669
2670
            while ($i < $length) {
2671
                try {
2672
                    $rand_int = \random_int(0, $max_length - 1);
2673
                } catch (\Exception $e) {
2674
                    /** @noinspection RandomApiMigrationInspection */
2675
                    $rand_int = \mt_rand(0, $max_length - 1);
2676
                }
2677
                $char = self::substr($possible_chars, $rand_int, 1, $encoding);
2678
                if ($char !== false) {
2679
                    $str .= $char;
2680
                    ++$i;
2681
                }
2682
            }
2683
        }
2684
2685 1
        return $str;
2686
    }
2687
2688
    /**
2689
     * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p>
2690
     * @param bool       $use_md5       [optional] <p>Return the unique identifier as md5-hash? Default: true</p>
2691
     *
2692
     * @return string
2693
     */
2694 1
    public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string
2695
    {
2696
        try {
2697 1
            $rand_int = \random_int(0, \mt_getrandmax());
2698
        } catch (\Exception $e) {
2699
            /** @noinspection RandomApiMigrationInspection */
2700
            $rand_int = \mt_rand(0, \mt_getrandmax());
2701
        }
2702
2703
        $unique_helper = $rand_int .
2704 1
                         \session_id() .
2705 1
                         ($_SERVER['REMOTE_ADDR'] ?? '') .
2706 1
                         ($_SERVER['SERVER_ADDR'] ?? '') .
2707 1
                         $extra_entropy;
2708
2709 1
        $unique_string = \uniqid($unique_helper, true);
2710
2711 1
        if ($use_md5) {
2712 1
            $unique_string = \md5($unique_string . $unique_helper);
2713
        }
2714
2715 1
        return $unique_string;
2716
    }
2717
2718
    /**
2719
     * alias for "UTF8::string_has_bom()"
2720
     *
2721
     * @param string $str
2722
     *
2723
     * @psalm-pure
2724
     *
2725
     * @return bool
2726
     *
2727
     * @see        UTF8::string_has_bom()
2728
     * @deprecated <p>please use "UTF8::string_has_bom()"</p>
2729
     */
2730 2
    public static function hasBom(string $str): bool
2731
    {
2732 2
        return self::string_has_bom($str);
2733
    }
2734
2735
    /**
2736
     * Returns true if the string contains a lower case char, false otherwise.
2737
     *
2738
     * @param string $str <p>The input string.</p>
2739
     *
2740
     * @psalm-pure
2741
     *
2742
     * @return bool
2743
     *              <p>Whether or not the string contains a lower case character.</p>
2744
     */
2745 47
    public static function has_lowercase(string $str): bool
2746
    {
2747 47
        if (self::$SUPPORT['mbstring'] === true) {
2748
            /** @noinspection PhpComposerExtensionStubsInspection */
2749 47
            return \mb_ereg_match('.*[[:lower:]]', $str);
2750
        }
2751
2752
        return self::str_matches_pattern($str, '.*[[:lower:]]');
2753
    }
2754
2755
    /**
2756
     * Returns true if the string contains whitespace, false otherwise.
2757
     *
2758
     * @param string $str <p>The input string.</p>
2759
     *
2760
     * @psalm-pure
2761
     *
2762
     * @return bool
2763
     *              <p>Whether or not the string contains whitespace.</p>
2764
     */
2765 11
    public static function has_whitespace(string $str): bool
2766
    {
2767 11
        if (self::$SUPPORT['mbstring'] === true) {
2768
            /** @noinspection PhpComposerExtensionStubsInspection */
2769 11
            return \mb_ereg_match('.*[[:space:]]', $str);
2770
        }
2771
2772
        return self::str_matches_pattern($str, '.*[[:space:]]');
2773
    }
2774
2775
    /**
2776
     * Returns true if the string contains an upper case char, false otherwise.
2777
     *
2778
     * @param string $str <p>The input string.</p>
2779
     *
2780
     * @psalm-pure
2781
     *
2782
     * @return bool
2783
     *              <p>Whether or not the string contains an upper case character.</p>
2784
     */
2785 12
    public static function has_uppercase(string $str): bool
2786
    {
2787 12
        if (self::$SUPPORT['mbstring'] === true) {
2788
            /** @noinspection PhpComposerExtensionStubsInspection */
2789 12
            return \mb_ereg_match('.*[[:upper:]]', $str);
2790
        }
2791
2792
        return self::str_matches_pattern($str, '.*[[:upper:]]');
2793
    }
2794
2795
    /**
2796
     * Converts a hexadecimal value into a UTF-8 character.
2797
     *
2798
     * INFO: opposite to UTF8::chr_to_hex()
2799
     *
2800
     * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code>
2801
     *
2802
     * @param string $hexdec <p>The hexadecimal value.</p>
2803
     *
2804
     * @psalm-pure
2805
     *
2806
     * @return false|string one single UTF-8 character
2807
     */
2808 4
    public static function hex_to_chr(string $hexdec)
2809
    {
2810
        /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */
2811 4
        return self::decimal_to_chr((int) @\hexdec($hexdec));
2812
    }
2813
2814
    /**
2815
     * Converts hexadecimal U+xxxx code point representation to integer.
2816
     *
2817
     * INFO: opposite to UTF8::int_to_hex()
2818
     *
2819
     * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code>
2820
     *
2821
     * @param string $hexdec <p>The hexadecimal code point representation.</p>
2822
     *
2823
     * @psalm-pure
2824
     *
2825
     * @return false|int
2826
     *                   <p>The code point, or false on failure.</p>
2827
     */
2828 2
    public static function hex_to_int($hexdec)
2829
    {
2830
        // init
2831 2
        $hexdec = (string) $hexdec;
2832
2833 2
        if ($hexdec === '') {
2834 2
            return false;
2835
        }
2836
2837 2
        if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) {
2838 2
            return \intval($match[1], 16);
2839
        }
2840
2841 2
        return false;
2842
    }
2843
2844
    /**
2845
     * alias for "UTF8::html_entity_decode()"
2846
     *
2847
     * @param string   $str
2848
     * @param int|null $flags
2849
     * @param string   $encoding
2850
     *
2851
     * @psalm-pure
2852
     *
2853
     * @return string
2854
     *
2855
     * @see        UTF8::html_entity_decode()
2856
     * @deprecated <p>please use "UTF8::html_entity_decode()"</p>
2857
     */
2858 2
    public static function html_decode(
2859
        string $str,
2860
        int $flags = null,
2861
        string $encoding = 'UTF-8'
2862
    ): string {
2863 2
        return self::html_entity_decode($str, $flags, $encoding);
2864
    }
2865
2866
    /**
2867
     * Converts a UTF-8 string to a series of HTML numbered entities.
2868
     *
2869
     * INFO: opposite to UTF8::html_decode()
2870
     *
2871
     * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '&#20013;&#25991;&#31354;&#30333;'</code>
2872
     *
2873
     * @param string $str              <p>The Unicode string to be encoded as numbered entities.</p>
2874
     * @param bool   $keep_ascii_chars [optional] <p>Keep ASCII chars.</p>
2875
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
2876
     *
2877
     * @psalm-pure
2878
     *
2879
     * @return string HTML numbered entities
2880
     */
2881 14
    public static function html_encode(
2882
        string $str,
2883
        bool $keep_ascii_chars = false,
2884
        string $encoding = 'UTF-8'
2885
    ): string {
2886 14
        if ($str === '') {
2887 4
            return '';
2888
        }
2889
2890 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
2891 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
2892
        }
2893
2894
        // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2895 14
        if (self::$SUPPORT['mbstring'] === true) {
2896 14
            $start_code = 0x00;
2897 14
            if ($keep_ascii_chars) {
2898 13
                $start_code = 0x80;
2899
            }
2900
2901 14
            if ($encoding === 'UTF-8') {
2902
                /** @var false|string|null $return - needed for PhpStan (stubs error) */
2903 14
                $return = \mb_encode_numericentity(
2904 14
                    $str,
2905 14
                    [$start_code, 0xfffff, 0, 0xfffff, 0]
2906
                );
2907 14
                if ($return !== null && $return !== false) {
2908 14
                    return $return;
2909
                }
2910
            }
2911
2912
            /** @var false|string|null $return - needed for PhpStan (stubs error) */
2913 4
            $return = \mb_encode_numericentity(
2914 4
                $str,
2915 4
                [$start_code, 0xfffff, 0, 0xfffff, 0],
2916 4
                $encoding
2917
            );
2918 4
            if ($return !== null && $return !== false) {
2919 4
                return $return;
2920
            }
2921
        }
2922
2923
        //
2924
        // fallback via vanilla php
2925
        //
2926
2927
        return \implode(
2928
            '',
2929
            \array_map(
2930
                static function (string $chr) use ($keep_ascii_chars, $encoding): string {
2931
                    return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding);
2932
                },
2933
                self::str_split($str)
2934
            )
2935
        );
2936
    }
2937
2938
    /**
2939
     * UTF-8 version of html_entity_decode()
2940
     *
2941
     * The reason we are not using html_entity_decode() by itself is because
2942
     * while it is not technically correct to leave out the semicolon
2943
     * at the end of an entity most browsers will still interpret the entity
2944
     * correctly. html_entity_decode() does not convert entities without
2945
     * semicolons, so we are left with our own little solution here. Bummer.
2946
     *
2947
     * Convert all HTML entities to their applicable characters.
2948
     *
2949
     * INFO: opposite to UTF8::html_encode()
2950
     *
2951
     * EXAMPLE: <code>UTF8::html_entity_decode('&#20013;&#25991;&#31354;&#30333;'); // '中文空白'</code>
2952
     *
2953
     * @see http://php.net/manual/en/function.html-entity-decode.php
2954
     *
2955
     * @param string   $str      <p>
2956
     *                           The input string.
2957
     *                           </p>
2958
     * @param int|null $flags    [optional] <p>
2959
     *                           A bitmask of one or more of the following flags, which specify how to handle quotes
2960
     *                           and which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2961
     *                           <table>
2962
     *                           Available <i>flags</i> constants
2963
     *                           <tr valign="top">
2964
     *                           <td>Constant Name</td>
2965
     *                           <td>Description</td>
2966
     *                           </tr>
2967
     *                           <tr valign="top">
2968
     *                           <td><b>ENT_COMPAT</b></td>
2969
     *                           <td>Will convert double-quotes and leave single-quotes alone.</td>
2970
     *                           </tr>
2971
     *                           <tr valign="top">
2972
     *                           <td><b>ENT_QUOTES</b></td>
2973
     *                           <td>Will convert both double and single quotes.</td>
2974
     *                           </tr>
2975
     *                           <tr valign="top">
2976
     *                           <td><b>ENT_NOQUOTES</b></td>
2977
     *                           <td>Will leave both double and single quotes unconverted.</td>
2978
     *                           </tr>
2979
     *                           <tr valign="top">
2980
     *                           <td><b>ENT_HTML401</b></td>
2981
     *                           <td>
2982
     *                           Handle code as HTML 4.01.
2983
     *                           </td>
2984
     *                           </tr>
2985
     *                           <tr valign="top">
2986
     *                           <td><b>ENT_XML1</b></td>
2987
     *                           <td>
2988
     *                           Handle code as XML 1.
2989
     *                           </td>
2990
     *                           </tr>
2991
     *                           <tr valign="top">
2992
     *                           <td><b>ENT_XHTML</b></td>
2993
     *                           <td>
2994
     *                           Handle code as XHTML.
2995
     *                           </td>
2996
     *                           </tr>
2997
     *                           <tr valign="top">
2998
     *                           <td><b>ENT_HTML5</b></td>
2999
     *                           <td>
3000
     *                           Handle code as HTML 5.
3001
     *                           </td>
3002
     *                           </tr>
3003
     *                           </table>
3004
     *                           </p>
3005
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3006
     *
3007
     * @psalm-pure
3008
     *
3009
     * @return string the decoded string
3010
     */
3011 51
    public static function html_entity_decode(
3012
        string $str,
3013
        int $flags = null,
3014
        string $encoding = 'UTF-8'
3015
    ): string {
3016
        if (
3017 51
            !isset($str[3]) // examples: &; || &x;
3018
            ||
3019 51
            \strpos($str, '&') === false // no "&"
3020
        ) {
3021 24
            return $str;
3022
        }
3023
3024 49
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3025 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3026
        }
3027
3028 49
        if ($flags === null) {
3029 11
            $flags = \ENT_QUOTES | \ENT_HTML5;
3030
        }
3031
3032
        /** @noinspection InArrayCanBeUsedInspection */
3033
        if (
3034 49
            $encoding !== 'UTF-8'
3035
            &&
3036 49
            $encoding !== 'ISO-8859-1'
3037
            &&
3038 49
            $encoding !== 'WINDOWS-1252'
3039
            &&
3040 49
            self::$SUPPORT['mbstring'] === false
3041
        ) {
3042
            /**
3043
             * @psalm-suppress ImpureFunctionCall - is is only a warning
3044
             */
3045
            \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
3046
        }
3047
3048
        do {
3049 49
            $str_compare = $str;
3050
3051 49
            if (\strpos($str, '&') !== false) {
3052 49
                if (\strpos($str, '&#') !== false) {
3053
                    // decode also numeric & UTF16 two byte entities
3054 41
                    $str = (string) \preg_replace(
3055 41
                        '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S',
3056 41
                        '$1;',
3057 41
                        $str
3058
                    );
3059
                }
3060
3061 49
                $str = \html_entity_decode(
3062 49
                    $str,
3063 49
                    $flags,
3064 49
                    $encoding
3065
                );
3066
            }
3067 49
        } while ($str_compare !== $str);
3068
3069 49
        return $str;
3070
    }
3071
3072
    /**
3073
     * Create a escape html version of the string via "UTF8::htmlspecialchars()".
3074
     *
3075
     * @param string $str
3076
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
3077
     *
3078
     * @psalm-pure
3079
     *
3080
     * @return string
3081
     */
3082 6
    public static function html_escape(string $str, string $encoding = 'UTF-8'): string
3083
    {
3084 6
        return self::htmlspecialchars(
3085 6
            $str,
3086 6
            \ENT_QUOTES | \ENT_SUBSTITUTE,
3087 6
            $encoding
3088
        );
3089
    }
3090
3091
    /**
3092
     * Remove empty html-tag.
3093
     *
3094
     * e.g.: <pre><tag></tag></pre>
3095
     *
3096
     * @param string $str
3097
     *
3098
     * @psalm-pure
3099
     *
3100
     * @return string
3101
     */
3102 1
    public static function html_stripe_empty_tags(string $str): string
3103
    {
3104 1
        return (string) \preg_replace(
3105 1
            '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u',
3106 1
            '',
3107 1
            $str
3108
        );
3109
    }
3110
3111
    /**
3112
     * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities().
3113
     *
3114
     * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '&lt;&#30333;-&ouml;&auml;&uuml;&gt;'</code>
3115
     *
3116
     * @see http://php.net/manual/en/function.htmlentities.php
3117
     *
3118
     * @param string $str           <p>
3119
     *                              The input string.
3120
     *                              </p>
3121
     * @param int    $flags         [optional] <p>
3122
     *                              A bitmask of one or more of the following flags, which specify how to handle
3123
     *                              quotes, invalid code unit sequences and the used document type. The default is
3124
     *                              ENT_COMPAT | ENT_HTML401.
3125
     *                              <table>
3126
     *                              Available <i>flags</i> constants
3127
     *                              <tr valign="top">
3128
     *                              <td>Constant Name</td>
3129
     *                              <td>Description</td>
3130
     *                              </tr>
3131
     *                              <tr valign="top">
3132
     *                              <td><b>ENT_COMPAT</b></td>
3133
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3134
     *                              </tr>
3135
     *                              <tr valign="top">
3136
     *                              <td><b>ENT_QUOTES</b></td>
3137
     *                              <td>Will convert both double and single quotes.</td>
3138
     *                              </tr>
3139
     *                              <tr valign="top">
3140
     *                              <td><b>ENT_NOQUOTES</b></td>
3141
     *                              <td>Will leave both double and single quotes unconverted.</td>
3142
     *                              </tr>
3143
     *                              <tr valign="top">
3144
     *                              <td><b>ENT_IGNORE</b></td>
3145
     *                              <td>
3146
     *                              Silently discard invalid code unit sequences instead of returning
3147
     *                              an empty string. Using this flag is discouraged as it
3148
     *                              may have security implications.
3149
     *                              </td>
3150
     *                              </tr>
3151
     *                              <tr valign="top">
3152
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3153
     *                              <td>
3154
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3155
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3156
     *                              string.
3157
     *                              </td>
3158
     *                              </tr>
3159
     *                              <tr valign="top">
3160
     *                              <td><b>ENT_DISALLOWED</b></td>
3161
     *                              <td>
3162
     *                              Replace invalid code points for the given document type with a
3163
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3164
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3165
     *                              instance, to ensure the well-formedness of XML documents with
3166
     *                              embedded external content.
3167
     *                              </td>
3168
     *                              </tr>
3169
     *                              <tr valign="top">
3170
     *                              <td><b>ENT_HTML401</b></td>
3171
     *                              <td>
3172
     *                              Handle code as HTML 4.01.
3173
     *                              </td>
3174
     *                              </tr>
3175
     *                              <tr valign="top">
3176
     *                              <td><b>ENT_XML1</b></td>
3177
     *                              <td>
3178
     *                              Handle code as XML 1.
3179
     *                              </td>
3180
     *                              </tr>
3181
     *                              <tr valign="top">
3182
     *                              <td><b>ENT_XHTML</b></td>
3183
     *                              <td>
3184
     *                              Handle code as XHTML.
3185
     *                              </td>
3186
     *                              </tr>
3187
     *                              <tr valign="top">
3188
     *                              <td><b>ENT_HTML5</b></td>
3189
     *                              <td>
3190
     *                              Handle code as HTML 5.
3191
     *                              </td>
3192
     *                              </tr>
3193
     *                              </table>
3194
     *                              </p>
3195
     * @param string $encoding      [optional] <p>
3196
     *                              Like <b>htmlspecialchars</b>,
3197
     *                              <b>htmlentities</b> takes an optional third argument
3198
     *                              <i>encoding</i> which defines encoding used in
3199
     *                              conversion.
3200
     *                              Although this argument is technically optional, you are highly
3201
     *                              encouraged to specify the correct value for your code.
3202
     *                              </p>
3203
     * @param bool   $double_encode [optional] <p>
3204
     *                              When <i>double_encode</i> is turned off PHP will not
3205
     *                              encode existing html entities. The default is to convert everything.
3206
     *                              </p>
3207
     *
3208
     * @psalm-pure
3209
     *
3210
     * @return string
3211
     *                <p>
3212
     *                The encoded string.
3213
     *                <br><br>
3214
     *                If the input <i>string</i> contains an invalid code unit
3215
     *                sequence within the given <i>encoding</i> an empty string
3216
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3217
     *                <b>ENT_SUBSTITUTE</b> flags are set.
3218
     *                </p>
3219
     */
3220 9
    public static function htmlentities(
3221
        string $str,
3222
        int $flags = \ENT_COMPAT,
3223
        string $encoding = 'UTF-8',
3224
        bool $double_encode = true
3225
    ): string {
3226 9
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3227 7
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3228
        }
3229
3230 9
        $str = \htmlentities(
3231 9
            $str,
3232 9
            $flags,
3233 9
            $encoding,
3234 9
            $double_encode
3235
        );
3236
3237
        /**
3238
         * PHP doesn't replace a backslash to its html entity since this is something
3239
         * that's mostly used to escape characters when inserting in a database. Since
3240
         * we're using a decent database layer, we don't need this shit and we're replacing
3241
         * the double backslashes by its' html entity equivalent.
3242
         *
3243
         * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
3244
         */
3245 9
        $str = \str_replace('\\', '&#92;', $str);
3246
3247 9
        return self::html_encode($str, true, $encoding);
3248
    }
3249
3250
    /**
3251
     * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
3252
     *
3253
     * INFO: Take a look at "UTF8::htmlentities()"
3254
     *
3255
     * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '&lt;白-öäü&gt;'</code>
3256
     *
3257
     * @see http://php.net/manual/en/function.htmlspecialchars.php
3258
     *
3259
     * @param string $str           <p>
3260
     *                              The string being converted.
3261
     *                              </p>
3262
     * @param int    $flags         [optional] <p>
3263
     *                              A bitmask of one or more of the following flags, which specify how to handle
3264
     *                              quotes, invalid code unit sequences and the used document type. The default is
3265
     *                              ENT_COMPAT | ENT_HTML401.
3266
     *                              <table>
3267
     *                              Available <i>flags</i> constants
3268
     *                              <tr valign="top">
3269
     *                              <td>Constant Name</td>
3270
     *                              <td>Description</td>
3271
     *                              </tr>
3272
     *                              <tr valign="top">
3273
     *                              <td><b>ENT_COMPAT</b></td>
3274
     *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
3275
     *                              </tr>
3276
     *                              <tr valign="top">
3277
     *                              <td><b>ENT_QUOTES</b></td>
3278
     *                              <td>Will convert both double and single quotes.</td>
3279
     *                              </tr>
3280
     *                              <tr valign="top">
3281
     *                              <td><b>ENT_NOQUOTES</b></td>
3282
     *                              <td>Will leave both double and single quotes unconverted.</td>
3283
     *                              </tr>
3284
     *                              <tr valign="top">
3285
     *                              <td><b>ENT_IGNORE</b></td>
3286
     *                              <td>
3287
     *                              Silently discard invalid code unit sequences instead of returning
3288
     *                              an empty string. Using this flag is discouraged as it
3289
     *                              may have security implications.
3290
     *                              </td>
3291
     *                              </tr>
3292
     *                              <tr valign="top">
3293
     *                              <td><b>ENT_SUBSTITUTE</b></td>
3294
     *                              <td>
3295
     *                              Replace invalid code unit sequences with a Unicode Replacement Character
3296
     *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty
3297
     *                              string.
3298
     *                              </td>
3299
     *                              </tr>
3300
     *                              <tr valign="top">
3301
     *                              <td><b>ENT_DISALLOWED</b></td>
3302
     *                              <td>
3303
     *                              Replace invalid code points for the given document type with a
3304
     *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
3305
     *                              (otherwise) instead of leaving them as is. This may be useful, for
3306
     *                              instance, to ensure the well-formedness of XML documents with
3307
     *                              embedded external content.
3308
     *                              </td>
3309
     *                              </tr>
3310
     *                              <tr valign="top">
3311
     *                              <td><b>ENT_HTML401</b></td>
3312
     *                              <td>
3313
     *                              Handle code as HTML 4.01.
3314
     *                              </td>
3315
     *                              </tr>
3316
     *                              <tr valign="top">
3317
     *                              <td><b>ENT_XML1</b></td>
3318
     *                              <td>
3319
     *                              Handle code as XML 1.
3320
     *                              </td>
3321
     *                              </tr>
3322
     *                              <tr valign="top">
3323
     *                              <td><b>ENT_XHTML</b></td>
3324
     *                              <td>
3325
     *                              Handle code as XHTML.
3326
     *                              </td>
3327
     *                              </tr>
3328
     *                              <tr valign="top">
3329
     *                              <td><b>ENT_HTML5</b></td>
3330
     *                              <td>
3331
     *                              Handle code as HTML 5.
3332
     *                              </td>
3333
     *                              </tr>
3334
     *                              </table>
3335
     *                              </p>
3336
     * @param string $encoding      [optional] <p>
3337
     *                              Defines encoding used in conversion.
3338
     *                              </p>
3339
     *                              <p>
3340
     *                              For the purposes of this function, the encodings
3341
     *                              ISO-8859-1, ISO-8859-15,
3342
     *                              UTF-8, cp866,
3343
     *                              cp1251, cp1252, and
3344
     *                              KOI8-R are effectively equivalent, provided the
3345
     *                              <i>string</i> itself is valid for the encoding, as
3346
     *                              the characters affected by <b>htmlspecialchars</b> occupy
3347
     *                              the same positions in all of these encodings.
3348
     *                              </p>
3349
     * @param bool   $double_encode [optional] <p>
3350
     *                              When <i>double_encode</i> is turned off PHP will not
3351
     *                              encode existing html entities, the default is to convert everything.
3352
     *                              </p>
3353
     *
3354
     * @psalm-pure
3355
     *
3356
     * @return string the converted string.
3357
     *                </p>
3358
     *                <p>
3359
     *                If the input <i>string</i> contains an invalid code unit
3360
     *                sequence within the given <i>encoding</i> an empty string
3361
     *                will be returned, unless either the <b>ENT_IGNORE</b> or
3362
     *                <b>ENT_SUBSTITUTE</b> flags are set
3363
     */
3364 8
    public static function htmlspecialchars(
3365
        string $str,
3366
        int $flags = \ENT_COMPAT,
3367
        string $encoding = 'UTF-8',
3368
        bool $double_encode = true
3369
    ): string {
3370 8
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
3371 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
3372
        }
3373
3374 8
        return \htmlspecialchars(
3375 8
            $str,
3376 8
            $flags,
3377 8
            $encoding,
3378 8
            $double_encode
3379
        );
3380
    }
3381
3382
    /**
3383
     * Checks whether iconv is available on the server.
3384
     *
3385
     * @psalm-pure
3386
     *
3387
     * @return bool
3388
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3389
     */
3390
    public static function iconv_loaded(): bool
3391
    {
3392
        return \extension_loaded('iconv');
3393
    }
3394
3395
    /**
3396
     * alias for "UTF8::decimal_to_chr()"
3397
     *
3398
     * @param int|string $int
3399
     *
3400
     * @psalm-param int|numeric-string $int
3401
     *
3402
     * @psalm-pure
3403
     *
3404
     * @return string
3405
     *
3406
     * @see        UTF8::decimal_to_chr()
3407
     * @deprecated <p>please use "UTF8::decimal_to_chr()"</p>
3408
     */
3409 4
    public static function int_to_chr($int): string
3410
    {
3411 4
        return self::decimal_to_chr($int);
3412
    }
3413
3414
    /**
3415
     * Converts Integer to hexadecimal U+xxxx code point representation.
3416
     *
3417
     * INFO: opposite to UTF8::hex_to_int()
3418
     *
3419
     * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code>
3420
     *
3421
     * @param int    $int    <p>The integer to be converted to hexadecimal code point.</p>
3422
     * @param string $prefix [optional]
3423
     *
3424
     * @psalm-pure
3425
     *
3426
     * @return string the code point, or empty string on failure
3427
     */
3428 6
    public static function int_to_hex(int $int, string $prefix = 'U+'): string
3429
    {
3430 6
        $hex = \dechex($int);
3431
3432 6
        $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex);
3433
3434 6
        return $prefix . $hex . '';
3435
    }
3436
3437
    /**
3438
     * Checks whether intl-char is available on the server.
3439
     *
3440
     * @psalm-pure
3441
     *
3442
     * @return bool
3443
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3444
     */
3445
    public static function intlChar_loaded(): bool
3446
    {
3447
        return \class_exists('IntlChar');
3448
    }
3449
3450
    /**
3451
     * Checks whether intl is available on the server.
3452
     *
3453
     * @psalm-pure
3454
     *
3455
     * @return bool
3456
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
3457
     */
3458 5
    public static function intl_loaded(): bool
3459
    {
3460 5
        return \extension_loaded('intl');
3461
    }
3462
3463
    /**
3464
     * alias for "UTF8::is_ascii()"
3465
     *
3466
     * @param string $str
3467
     *
3468
     * @psalm-pure
3469
     *
3470
     * @return bool
3471
     *
3472
     * @see        UTF8::is_ascii()
3473
     * @deprecated <p>please use "UTF8::is_ascii()"</p>
3474
     */
3475 2
    public static function isAscii(string $str): bool
3476
    {
3477 2
        return ASCII::is_ascii($str);
3478
    }
3479
3480
    /**
3481
     * alias for "UTF8::is_base64()"
3482
     *
3483
     * @param string $str
3484
     *
3485
     * @psalm-pure
3486
     *
3487
     * @return bool
3488
     *
3489
     * @see        UTF8::is_base64()
3490
     * @deprecated <p>please use "UTF8::is_base64()"</p>
3491
     */
3492 2
    public static function isBase64($str): bool
3493
    {
3494 2
        return self::is_base64($str);
3495
    }
3496
3497
    /**
3498
     * alias for "UTF8::is_binary()"
3499
     *
3500
     * @param int|string $str
3501
     * @param bool       $strict
3502
     *
3503
     * @psalm-pure
3504
     *
3505
     * @return bool
3506
     *
3507
     * @see        UTF8::is_binary()
3508
     * @deprecated <p>please use "UTF8::is_binary()"</p>
3509
     */
3510 4
    public static function isBinary($str, bool $strict = false): bool
3511
    {
3512 4
        return self::is_binary($str, $strict);
3513
    }
3514
3515
    /**
3516
     * alias for "UTF8::is_bom()"
3517
     *
3518
     * @param string $utf8_chr
3519
     *
3520
     * @psalm-pure
3521
     *
3522
     * @return bool
3523
     *
3524
     * @see        UTF8::is_bom()
3525
     * @deprecated <p>please use "UTF8::is_bom()"</p>
3526
     */
3527 2
    public static function isBom(string $utf8_chr): bool
3528
    {
3529 2
        return self::is_bom($utf8_chr);
3530
    }
3531
3532
    /**
3533
     * alias for "UTF8::is_html()"
3534
     *
3535
     * @param string $str
3536
     *
3537
     * @psalm-pure
3538
     *
3539
     * @return bool
3540
     *
3541
     * @see        UTF8::is_html()
3542
     * @deprecated <p>please use "UTF8::is_html()"</p>
3543
     */
3544 2
    public static function isHtml(string $str): bool
3545
    {
3546 2
        return self::is_html($str);
3547
    }
3548
3549
    /**
3550
     * alias for "UTF8::is_json()"
3551
     *
3552
     * @param string $str
3553
     *
3554
     * @return bool
3555
     *
3556
     * @see        UTF8::is_json()
3557
     * @deprecated <p>please use "UTF8::is_json()"</p>
3558
     */
3559 1
    public static function isJson(string $str): bool
3560
    {
3561 1
        return self::is_json($str);
3562
    }
3563
3564
    /**
3565
     * alias for "UTF8::is_utf16()"
3566
     *
3567
     * @param string $str
3568
     *
3569
     * @psalm-pure
3570
     *
3571
     * @return false|int
3572
     *                   <strong>false</strong> if is't not UTF16,<br>
3573
     *                   <strong>1</strong> for UTF-16LE,<br>
3574
     *                   <strong>2</strong> for UTF-16BE
3575
     *
3576
     * @see        UTF8::is_utf16()
3577
     * @deprecated <p>please use "UTF8::is_utf16()"</p>
3578
     */
3579 2
    public static function isUtf16($str)
3580
    {
3581 2
        return self::is_utf16($str);
3582
    }
3583
3584
    /**
3585
     * alias for "UTF8::is_utf32()"
3586
     *
3587
     * @param string $str
3588
     *
3589
     * @psalm-pure
3590
     *
3591
     * @return false|int
3592
     *                   <strong>false</strong> if is't not UTF16,
3593
     *                   <strong>1</strong> for UTF-32LE,
3594
     *                   <strong>2</strong> for UTF-32BE
3595
     *
3596
     * @see        UTF8::is_utf32()
3597
     * @deprecated <p>please use "UTF8::is_utf32()"</p>
3598
     */
3599 2
    public static function isUtf32($str)
3600
    {
3601 2
        return self::is_utf32($str);
3602
    }
3603
3604
    /**
3605
     * alias for "UTF8::is_utf8()"
3606
     *
3607
     * @param string $str
3608
     * @param bool   $strict
3609
     *
3610
     * @psalm-pure
3611
     *
3612
     * @return bool
3613
     *
3614
     * @see        UTF8::is_utf8()
3615
     * @deprecated <p>please use "UTF8::is_utf8()"</p>
3616
     */
3617 17
    public static function isUtf8($str, bool $strict = false): bool
3618
    {
3619 17
        return self::is_utf8($str, $strict);
3620
    }
3621
3622
    /**
3623
     * Returns true if the string contains only alphabetic chars, false otherwise.
3624
     *
3625
     * @param string $str <p>The input string.</p>
3626
     *
3627
     * @psalm-pure
3628
     *
3629
     * @return bool
3630
     *              <p>Whether or not $str contains only alphabetic chars.</p>
3631
     */
3632 10
    public static function is_alpha(string $str): bool
3633
    {
3634 10
        if (self::$SUPPORT['mbstring'] === true) {
3635
            /** @noinspection PhpComposerExtensionStubsInspection */
3636 10
            return \mb_ereg_match('^[[:alpha:]]*$', $str);
3637
        }
3638
3639
        return self::str_matches_pattern($str, '^[[:alpha:]]*$');
3640
    }
3641
3642
    /**
3643
     * Returns true if the string contains only alphabetic and numeric chars, false otherwise.
3644
     *
3645
     * @param string $str <p>The input string.</p>
3646
     *
3647
     * @psalm-pure
3648
     *
3649
     * @return bool
3650
     *              <p>Whether or not $str contains only alphanumeric chars.</p>
3651
     */
3652 13
    public static function is_alphanumeric(string $str): bool
3653
    {
3654 13
        if (self::$SUPPORT['mbstring'] === true) {
3655
            /** @noinspection PhpComposerExtensionStubsInspection */
3656 13
            return \mb_ereg_match('^[[:alnum:]]*$', $str);
3657
        }
3658
3659
        return self::str_matches_pattern($str, '^[[:alnum:]]*$');
3660
    }
3661
3662
    /**
3663
     * Returns true if the string contains only punctuation chars, false otherwise.
3664
     *
3665
     * @param string $str <p>The input string.</p>
3666
     *
3667
     * @psalm-pure
3668
     *
3669
     * @return bool
3670
     *              <p>Whether or not $str contains only punctuation chars.</p>
3671
     */
3672 10
    public static function is_punctuation(string $str): bool
3673
    {
3674 10
        return self::str_matches_pattern($str, '^[[:punct:]]*$');
3675
    }
3676
3677
    /**
3678
     * Returns true if the string contains only printable (non-invisible) chars, false otherwise.
3679
     *
3680
     * @param string $str <p>The input string.</p>
3681
     *
3682
     * @psalm-pure
3683
     *
3684
     * @return bool
3685
     *              <p>Whether or not $str contains only printable (non-invisible) chars.</p>
3686
     */
3687 1
    public static function is_printable(string $str): bool
3688
    {
3689 1
        return self::remove_invisible_characters($str) === $str;
3690
    }
3691
3692
    /**
3693
     * Checks if a string is 7 bit ASCII.
3694
     *
3695
     * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code>
3696
     *
3697
     * @param string $str <p>The string to check.</p>
3698
     *
3699
     * @psalm-pure
3700
     *
3701
     * @return bool
3702
     *              <p>
3703
     *              <strong>true</strong> if it is ASCII<br>
3704
     *              <strong>false</strong> otherwise
3705
     *              </p>
3706
     */
3707 8
    public static function is_ascii(string $str): bool
3708
    {
3709 8
        return ASCII::is_ascii($str);
3710
    }
3711
3712
    /**
3713
     * Returns true if the string is base64 encoded, false otherwise.
3714
     *
3715
     * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code>
3716
     *
3717
     * @param string|null $str                   <p>The input string.</p>
3718
     * @param bool        $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p>
3719
     *
3720
     * @psalm-pure
3721
     *
3722
     * @return bool
3723
     *              <p>Whether or not $str is base64 encoded.</p>
3724
     */
3725 16
    public static function is_base64($str, bool $empty_string_is_valid = false): bool
3726
    {
3727
        if (
3728 16
            !$empty_string_is_valid
3729
            &&
3730 16
            $str === ''
3731
        ) {
3732 3
            return false;
3733
        }
3734
3735 15
        if (!\is_string($str)) {
3736 2
            return false;
3737
        }
3738
3739 15
        $base64String = \base64_decode($str, true);
3740
3741 15
        return $base64String !== false && \base64_encode($base64String) === $str;
3742
    }
3743
3744
    /**
3745
     * Check if the input is binary... (is look like a hack).
3746
     *
3747
     * EXAMPLE: <code>UTF8::is_binary(01); // true</code>
3748
     *
3749
     * @param int|string $input
3750
     * @param bool       $strict
3751
     *
3752
     * @psalm-pure
3753
     *
3754
     * @return bool
3755
     */
3756 40
    public static function is_binary($input, bool $strict = false): bool
3757
    {
3758 40
        $input = (string) $input;
3759 40
        if ($input === '') {
3760 10
            return false;
3761
        }
3762
3763 40
        if (\preg_match('~^[01]+$~', $input)) {
3764 13
            return true;
3765
        }
3766
3767 40
        $ext = self::get_file_type($input);
3768 40
        if ($ext['type'] === 'binary') {
3769 7
            return true;
3770
        }
3771
3772 39
        $test_length = \strlen($input);
3773 39
        $test_null_counting = \substr_count($input, "\x0", 0, $test_length);
3774 39
        if (($test_null_counting / $test_length) > 0.25) {
3775 15
            return true;
3776
        }
3777
3778 35
        if ($strict) {
3779 35
            if (self::$SUPPORT['finfo'] === false) {
3780
                throw new \RuntimeException('ext-fileinfo: is not installed');
3781
            }
3782
3783
            /**
3784
             * @noinspection   PhpComposerExtensionStubsInspection
3785
             * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ...
3786
             */
3787 35
            $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input);
3788 35
            if ($finfo_encoding && $finfo_encoding === 'binary') {
3789 15
                return true;
3790
            }
3791
        }
3792
3793 31
        return false;
3794
    }
3795
3796
    /**
3797
     * Check if the file is binary.
3798
     *
3799
     * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code>
3800
     *
3801
     * @param string $file
3802
     *
3803
     * @return bool
3804
     */
3805 6
    public static function is_binary_file($file): bool
3806
    {
3807
        // init
3808 6
        $block = '';
3809
3810 6
        $fp = \fopen($file, 'rb');
3811 6
        if (\is_resource($fp)) {
3812 6
            $block = \fread($fp, 512);
3813 6
            \fclose($fp);
3814
        }
3815
3816 6
        if ($block === '' || $block === false) {
3817 2
            return false;
3818
        }
3819
3820 6
        return self::is_binary($block, true);
3821
    }
3822
3823
    /**
3824
     * Returns true if the string contains only whitespace chars, false otherwise.
3825
     *
3826
     * @param string $str <p>The input string.</p>
3827
     *
3828
     * @psalm-pure
3829
     *
3830
     * @return bool
3831
     *              <p>Whether or not $str contains only whitespace characters.</p>
3832
     */
3833 15
    public static function is_blank(string $str): bool
3834
    {
3835 15
        if (self::$SUPPORT['mbstring'] === true) {
3836
            /** @noinspection PhpComposerExtensionStubsInspection */
3837 15
            return \mb_ereg_match('^[[:space:]]*$', $str);
3838
        }
3839
3840
        return self::str_matches_pattern($str, '^[[:space:]]*$');
3841
    }
3842
3843
    /**
3844
     * Checks if the given string is equal to any "Byte Order Mark".
3845
     *
3846
     * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
3847
     *
3848
     * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code>
3849
     *
3850
     * @param string $str <p>The input string.</p>
3851
     *
3852
     * @psalm-pure
3853
     *
3854
     * @return bool
3855
     *              <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
3856
     */
3857 2
    public static function is_bom($str): bool
3858
    {
3859
        /** @noinspection PhpUnusedLocalVariableInspection */
3860 2
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
3861 2
            if ($str === $bom_string) {
3862 2
                return true;
3863
            }
3864
        }
3865
3866 2
        return false;
3867
    }
3868
3869
    /**
3870
     * Determine whether the string is considered to be empty.
3871
     *
3872
     * A variable is considered empty if it does not exist or if its value equals FALSE.
3873
     * empty() does not generate a warning if the variable does not exist.
3874
     *
3875
     * @param array|float|int|string $str
3876
     *
3877
     * @psalm-pure
3878
     *
3879
     * @return bool
3880
     *              <p>Whether or not $str is empty().</p>
3881
     */
3882 1
    public static function is_empty($str): bool
3883
    {
3884 1
        return empty($str);
3885
    }
3886
3887
    /**
3888
     * Returns true if the string contains only hexadecimal chars, false otherwise.
3889
     *
3890
     * @param string $str <p>The input string.</p>
3891
     *
3892
     * @psalm-pure
3893
     *
3894
     * @return bool
3895
     *              <p>Whether or not $str contains only hexadecimal chars.</p>
3896
     */
3897 13
    public static function is_hexadecimal(string $str): bool
3898
    {
3899 13
        if (self::$SUPPORT['mbstring'] === true) {
3900
            /** @noinspection PhpComposerExtensionStubsInspection */
3901 13
            return \mb_ereg_match('^[[:xdigit:]]*$', $str);
3902
        }
3903
3904
        return self::str_matches_pattern($str, '^[[:xdigit:]]*$');
3905
    }
3906
3907
    /**
3908
     * Check if the string contains any HTML tags.
3909
     *
3910
     * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code>
3911
     *
3912
     * @param string $str <p>The input string.</p>
3913
     *
3914
     * @psalm-pure
3915
     *
3916
     * @return bool
3917
     *              <p>Whether or not $str contains html elements.</p>
3918
     */
3919 3
    public static function is_html(string $str): bool
3920
    {
3921 3
        if ($str === '') {
3922 3
            return false;
3923
        }
3924
3925
        // init
3926 3
        $matches = [];
3927
3928 3
        $str = self::emoji_encode($str); // hack for emoji support :/
3929
3930 3
        \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches);
3931
3932 3
        return $matches !== [];
3933
    }
3934
3935
    /**
3936
     * Check if $url is an correct url.
3937
     *
3938
     * @param string $url
3939
     * @param bool   $disallow_localhost
3940
     *
3941
     * @psalm-pure
3942
     *
3943
     * @return bool
3944
     */
3945 1
    public static function is_url(string $url, bool $disallow_localhost = false): bool
3946
    {
3947 1
        if ($url === '') {
3948 1
            return false;
3949
        }
3950
3951
        // WARNING: keep this as hack protection
3952 1
        if (!self::str_istarts_with_any($url, ['http://', 'https://'])) {
3953 1
            return false;
3954
        }
3955
3956
        // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/...
3957 1
        if ($disallow_localhost) {
3958 1
            if (self::str_istarts_with_any(
3959 1
                $url,
3960
                [
3961 1
                    'http://localhost',
3962
                    'https://localhost',
3963
                    'http://127.0.0.1',
3964
                    'https://127.0.0.1',
3965
                    'http://::1',
3966
                    'https://::1',
3967
                ]
3968
            )) {
3969 1
                return false;
3970
            }
3971
3972 1
            $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu';
3973
            /** @noinspection BypassedUrlValidationInspection */
3974 1
            if (\preg_match($regex, $url)) {
3975 1
                return false;
3976
            }
3977
        }
3978
3979
        // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters
3980
        /** @noinspection SuspiciousAssignmentsInspection - false-positive - https://github.com/kalessil/phpinspectionsea/issues/1500 */
3981 1
        $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu';
3982
        /** @noinspection BypassedUrlValidationInspection */
3983 1
        if (\preg_match($regex, $url)) {
3984 1
            return true;
3985
        }
3986
3987
        /** @noinspection BypassedUrlValidationInspection */
3988 1
        return \filter_var($url, \FILTER_VALIDATE_URL) !== false;
3989
    }
3990
3991
    /**
3992
     * Try to check if "$str" is a JSON-string.
3993
     *
3994
     * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code>
3995
     *
3996
     * @param string $str                                    <p>The input string.</p>
3997
     * @param bool   $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json
3998
     *                                                       results.</p>
3999
     *
4000
     * @return bool
4001
     *              <p>Whether or not the $str is in JSON format.</p>
4002
     */
4003 42
    public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool
4004
    {
4005 42
        if ($str === '') {
4006 4
            return false;
4007
        }
4008
4009 40
        if (self::$SUPPORT['json'] === false) {
4010
            throw new \RuntimeException('ext-json: is not installed');
4011
        }
4012
4013 40
        $jsonOrNull = self::json_decode($str);
4014 40
        if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') {
4015 18
            return false;
4016
        }
4017
4018
        if (
4019 24
            $only_array_or_object_results_are_valid
4020
            &&
4021 24
            !\is_object($jsonOrNull)
4022
            &&
4023 24
            !\is_array($jsonOrNull)
4024
        ) {
4025 5
            return false;
4026
        }
4027
4028
        /** @noinspection PhpComposerExtensionStubsInspection */
4029 19
        return \json_last_error() === \JSON_ERROR_NONE;
4030
    }
4031
4032
    /**
4033
     * @param string $str <p>The input string.</p>
4034
     *
4035
     * @psalm-pure
4036
     *
4037
     * @return bool
4038
     *              <p>Whether or not $str contains only lowercase chars.</p>
4039
     */
4040 8
    public static function is_lowercase(string $str): bool
4041
    {
4042 8
        if (self::$SUPPORT['mbstring'] === true) {
4043
            /** @noinspection PhpComposerExtensionStubsInspection */
4044 8
            return \mb_ereg_match('^[[:lower:]]*$', $str);
4045
        }
4046
4047
        return self::str_matches_pattern($str, '^[[:lower:]]*$');
4048
    }
4049
4050
    /**
4051
     * Returns true if the string is serialized, false otherwise.
4052
     *
4053
     * @param string $str <p>The input string.</p>
4054
     *
4055
     * @psalm-pure
4056
     *
4057
     * @return bool
4058
     *              <p>Whether or not $str is serialized.</p>
4059
     */
4060 7
    public static function is_serialized(string $str): bool
4061
    {
4062 7
        if ($str === '') {
4063 1
            return false;
4064
        }
4065
4066
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
4067
        /** @noinspection UnserializeExploitsInspection */
4068 6
        return $str === 'b:0;'
4069
               ||
4070 6
               @\unserialize($str) !== false;
4071
    }
4072
4073
    /**
4074
     * Returns true if the string contains only lower case chars, false
4075
     * otherwise.
4076
     *
4077
     * @param string $str <p>The input string.</p>
4078
     *
4079
     * @psalm-pure
4080
     *
4081
     * @return bool
4082
     *              <p>Whether or not $str contains only lower case characters.</p>
4083
     */
4084 8
    public static function is_uppercase(string $str): bool
4085
    {
4086 8
        if (self::$SUPPORT['mbstring'] === true) {
4087
            /** @noinspection PhpComposerExtensionStubsInspection */
4088 8
            return \mb_ereg_match('^[[:upper:]]*$', $str);
4089
        }
4090
4091
        return self::str_matches_pattern($str, '^[[:upper:]]*$');
4092
    }
4093
4094
    /**
4095
     * Check if the string is UTF-16.
4096
     *
4097
     * EXAMPLE: <code>
4098
     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
4099
     * //
4100
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
4101
     * //
4102
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
4103
     * </code>
4104
     *
4105
     * @param string $str                       <p>The input string.</p>
4106
     * @param bool   $check_if_string_is_binary
4107
     *
4108
     * @psalm-pure
4109
     *
4110
     * @return false|int
4111
     *                   <strong>false</strong> if is't not UTF-16,<br>
4112
     *                   <strong>1</strong> for UTF-16LE,<br>
4113
     *                   <strong>2</strong> for UTF-16BE
4114
     */
4115 22
    public static function is_utf16($str, bool $check_if_string_is_binary = true)
4116
    {
4117
        // init
4118 22
        $str = (string) $str;
4119 22
        $str_chars = [];
4120
4121
        if (
4122 22
            $check_if_string_is_binary
4123
            &&
4124 22
            !self::is_binary($str, true)
4125
        ) {
4126 2
            return false;
4127
        }
4128
4129 22
        if (self::$SUPPORT['mbstring'] === false) {
4130
            /**
4131
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4132
             */
4133 3
            \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING);
4134
        }
4135
4136 22
        $str = self::remove_bom($str);
4137
4138 22
        $maybe_utf16le = 0;
4139 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
4140 22
        if ($test) {
4141 15
            $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
4142 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
4143 15
            if ($test3 === $test) {
4144
                /**
4145
                 * @psalm-suppress RedundantCondition
4146
                 */
4147 15
                if ($str_chars === []) {
4148 15
                    $str_chars = self::count_chars($str, true, false);
4149
                }
4150 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4151 15
                    if (\in_array($test3char, $str_chars, true)) {
4152 15
                        ++$maybe_utf16le;
4153
                    }
4154
                }
4155 15
                unset($test3charEmpty);
4156
            }
4157
        }
4158
4159 22
        $maybe_utf16be = 0;
4160 22
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
4161 22
        if ($test) {
4162 15
            $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
4163 15
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
4164 15
            if ($test3 === $test) {
4165 15
                if ($str_chars === []) {
4166 7
                    $str_chars = self::count_chars($str, true, false);
4167
                }
4168 15
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4169 15
                    if (\in_array($test3char, $str_chars, true)) {
4170 15
                        ++$maybe_utf16be;
4171
                    }
4172
                }
4173 15
                unset($test3charEmpty);
4174
            }
4175
        }
4176
4177 22
        if ($maybe_utf16be !== $maybe_utf16le) {
4178 7
            if ($maybe_utf16le > $maybe_utf16be) {
4179 5
                return 1;
4180
            }
4181
4182 6
            return 2;
4183
        }
4184
4185 18
        return false;
4186
    }
4187
4188
    /**
4189
     * Check if the string is UTF-32.
4190
     *
4191
     * EXAMPLE: <code>
4192
     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
4193
     * //
4194
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
4195
     * //
4196
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
4197
     * </code>
4198
     *
4199
     * @param string $str                       <p>The input string.</p>
4200
     * @param bool   $check_if_string_is_binary
4201
     *
4202
     * @psalm-pure
4203
     *
4204
     * @return false|int
4205
     *                   <strong>false</strong> if is't not UTF-32,<br>
4206
     *                   <strong>1</strong> for UTF-32LE,<br>
4207
     *                   <strong>2</strong> for UTF-32BE
4208
     */
4209 20
    public static function is_utf32($str, bool $check_if_string_is_binary = true)
4210
    {
4211
        // init
4212 20
        $str = (string) $str;
4213 20
        $str_chars = [];
4214
4215
        if (
4216 20
            $check_if_string_is_binary
4217
            &&
4218 20
            !self::is_binary($str, true)
4219
        ) {
4220 2
            return false;
4221
        }
4222
4223 20
        if (self::$SUPPORT['mbstring'] === false) {
4224
            /**
4225
             * @psalm-suppress ImpureFunctionCall - is is only a warning
4226
             */
4227 3
            \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING);
4228
        }
4229
4230 20
        $str = self::remove_bom($str);
4231
4232 20
        $maybe_utf32le = 0;
4233 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
4234 20
        if ($test) {
4235 13
            $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
4236 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
4237 13
            if ($test3 === $test) {
4238
                /**
4239
                 * @psalm-suppress RedundantCondition
4240
                 */
4241 13
                if ($str_chars === []) {
4242 13
                    $str_chars = self::count_chars($str, true, false);
4243
                }
4244 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4245 13
                    if (\in_array($test3char, $str_chars, true)) {
4246 13
                        ++$maybe_utf32le;
4247
                    }
4248
                }
4249 13
                unset($test3charEmpty);
4250
            }
4251
        }
4252
4253 20
        $maybe_utf32be = 0;
4254 20
        $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
4255 20
        if ($test) {
4256 13
            $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
4257 13
            $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
4258 13
            if ($test3 === $test) {
4259 13
                if ($str_chars === []) {
4260 7
                    $str_chars = self::count_chars($str, true, false);
4261
                }
4262 13
                foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) {
0 ignored issues
show
Bug introduced by
The expression self::count_chars($test3) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
4263 13
                    if (\in_array($test3char, $str_chars, true)) {
4264 13
                        ++$maybe_utf32be;
4265
                    }
4266
                }
4267 13
                unset($test3charEmpty);
4268
            }
4269
        }
4270
4271 20
        if ($maybe_utf32be !== $maybe_utf32le) {
4272 3
            if ($maybe_utf32le > $maybe_utf32be) {
4273 2
                return 1;
4274
            }
4275
4276 3
            return 2;
4277
        }
4278
4279 20
        return false;
4280
    }
4281
4282
    /**
4283
     * Checks whether the passed input contains only byte sequences that appear valid UTF-8.
4284
     *
4285
     * EXAMPLE: <code>
4286
     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
4287
     * //
4288
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
4289
     * </code>
4290
     *
4291
     * @param int|string|string[]|null $str    <p>The input to be checked.</p>
4292
     * @param bool                     $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
4293
     *
4294
     * @psalm-pure
4295
     *
4296
     * @return bool
4297
     */
4298 83
    public static function is_utf8($str, bool $strict = false): bool
4299
    {
4300 83
        if (\is_array($str)) {
4301 2
            foreach ($str as &$v) {
4302 2
                if (!self::is_utf8($v, $strict)) {
4303 2
                    return false;
4304
                }
4305
            }
4306
4307
            return true;
4308
        }
4309
4310 83
        return self::is_utf8_string((string) $str, $strict);
4311
    }
4312
4313
    /**
4314
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4315
     * Decodes a JSON string
4316
     *
4317
     * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code>
4318
     *
4319
     * @see http://php.net/manual/en/function.json-decode.php
4320
     *
4321
     * @param string $json    <p>
4322
     *                        The <i>json</i> string being decoded.
4323
     *                        </p>
4324
     *                        <p>
4325
     *                        This function only works with UTF-8 encoded strings.
4326
     *                        </p>
4327
     *                        <p>PHP implements a superset of
4328
     *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4329
     *                        only supports these values when they are nested inside an array or an object.
4330
     *                        </p>
4331
     * @param bool   $assoc   [optional] <p>
4332
     *                        When <b>TRUE</b>, returned objects will be converted into
4333
     *                        associative arrays.
4334
     *                        </p>
4335
     * @param int    $depth   [optional] <p>
4336
     *                        User specified recursion depth.
4337
     *                        </p>
4338
     * @param int    $options [optional] <p>
4339
     *                        Bitmask of JSON decode options. Currently only
4340
     *                        <b>JSON_BIGINT_AS_STRING</b>
4341
     *                        is supported (default is to cast large integers as floats)
4342
     *                        </p>
4343
     *
4344
     * @psalm-pure
4345
     *
4346
     * @return mixed
4347
     *               <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and
4348
     *               null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively.
4349
     *               <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data
4350
     *               is deeper than the recursion limit.</p>
4351
     */
4352 43
    public static function json_decode(
4353
        string $json,
4354
        bool $assoc = false,
4355
        int $depth = 512,
4356
        int $options = 0
4357
    ) {
4358 43
        $json = self::filter($json);
4359
4360 43
        if (self::$SUPPORT['json'] === false) {
4361
            throw new \RuntimeException('ext-json: is not installed');
4362
        }
4363
4364
        /** @noinspection PhpComposerExtensionStubsInspection */
4365 43
        return \json_decode($json, $assoc, $depth, $options);
4366
    }
4367
4368
    /**
4369
     * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
4370
     * Returns the JSON representation of a value.
4371
     *
4372
     * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code>
4373
     *
4374
     * @see http://php.net/manual/en/function.json-encode.php
4375
     *
4376
     * @param mixed $value   <p>
4377
     *                       The <i>value</i> being encoded. Can be any type except
4378
     *                       a resource.
4379
     *                       </p>
4380
     *                       <p>
4381
     *                       All string data must be UTF-8 encoded.
4382
     *                       </p>
4383
     *                       <p>PHP implements a superset of
4384
     *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
4385
     *                       only supports these values when they are nested inside an array or an object.
4386
     *                       </p>
4387
     * @param int   $options [optional] <p>
4388
     *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
4389
     *                       <b>JSON_HEX_TAG</b>,
4390
     *                       <b>JSON_HEX_AMP</b>,
4391
     *                       <b>JSON_HEX_APOS</b>,
4392
     *                       <b>JSON_NUMERIC_CHECK</b>,
4393
     *                       <b>JSON_PRETTY_PRINT</b>,
4394
     *                       <b>JSON_UNESCAPED_SLASHES</b>,
4395
     *                       <b>JSON_FORCE_OBJECT</b>,
4396
     *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
4397
     *                       constants is described on
4398
     *                       the JSON constants page.
4399
     *                       </p>
4400
     * @param int   $depth   [optional] <p>
4401
     *                       Set the maximum depth. Must be greater than zero.
4402
     *                       </p>
4403
     *
4404
     * @psalm-pure
4405
     *
4406
     * @return false|string
4407
     *                      A JSON encoded <strong>string</strong> on success or<br>
4408
     *                      <strong>FALSE</strong> on failure
4409
     */
4410 5
    public static function json_encode($value, int $options = 0, int $depth = 512)
4411
    {
4412 5
        $value = self::filter($value);
4413
4414 5
        if (self::$SUPPORT['json'] === false) {
4415
            throw new \RuntimeException('ext-json: is not installed');
4416
        }
4417
4418
        /** @noinspection PhpComposerExtensionStubsInspection */
4419 5
        return \json_encode($value, $options, $depth);
4420
    }
4421
4422
    /**
4423
     * Checks whether JSON is available on the server.
4424
     *
4425
     * @psalm-pure
4426
     *
4427
     * @return bool
4428
     *              <strong>true</strong> if available, <strong>false</strong> otherwise
4429
     */
4430
    public static function json_loaded(): bool
4431
    {
4432
        return \function_exists('json_decode');
4433
    }
4434
4435
    /**
4436
     * Makes string's first char lowercase.
4437
     *
4438
     * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code>
4439
     *
4440
     * @param string      $str                           <p>The input string</p>
4441
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
4442
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4443
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4444
     *                                                   tr</p>
4445
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4446
     *                                                   -> ß</p>
4447
     *
4448
     * @psalm-pure
4449
     *
4450
     * @return string the resulting string
4451
     */
4452 46
    public static function lcfirst(
4453
        string $str,
4454
        string $encoding = 'UTF-8',
4455
        bool $clean_utf8 = false,
4456
        string $lang = null,
4457
        bool $try_to_keep_the_string_length = false
4458
    ): string {
4459 46
        if ($clean_utf8) {
4460
            $str = self::clean($str);
4461
        }
4462
4463 46
        $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length);
4464
4465 46
        if ($encoding === 'UTF-8') {
4466 43
            $str_part_two = (string) \mb_substr($str, 1);
4467
4468 43
            if ($use_mb_functions) {
4469 43
                $str_part_one = \mb_strtolower(
4470 43
                    (string) \mb_substr($str, 0, 1)
4471
                );
4472
            } else {
4473
                $str_part_one = self::strtolower(
4474
                    (string) \mb_substr($str, 0, 1),
4475
                    $encoding,
4476
                    false,
4477
                    $lang,
4478 43
                    $try_to_keep_the_string_length
4479
                );
4480
            }
4481
        } else {
4482 3
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
4483
4484 3
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
4485
4486 3
            $str_part_one = self::strtolower(
4487 3
                (string) self::substr($str, 0, 1, $encoding),
4488 3
                $encoding,
4489 3
                false,
4490 3
                $lang,
4491 3
                $try_to_keep_the_string_length
4492
            );
4493
        }
4494
4495 46
        return $str_part_one . $str_part_two;
4496
    }
4497
4498
    /**
4499
     * alias for "UTF8::lcfirst()"
4500
     *
4501
     * @param string      $str
4502
     * @param string      $encoding
4503
     * @param bool        $clean_utf8
4504
     * @param string|null $lang
4505
     * @param bool        $try_to_keep_the_string_length
4506
     *
4507
     * @psalm-pure
4508
     *
4509
     * @return string
4510
     *
4511
     * @see        UTF8::lcfirst()
4512
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4513
     */
4514 2
    public static function lcword(
4515
        string $str,
4516
        string $encoding = 'UTF-8',
4517
        bool $clean_utf8 = false,
4518
        string $lang = null,
4519
        bool $try_to_keep_the_string_length = false
4520
    ): string {
4521 2
        return self::lcfirst(
4522 2
            $str,
4523 2
            $encoding,
4524 2
            $clean_utf8,
4525 2
            $lang,
4526 2
            $try_to_keep_the_string_length
4527
        );
4528
    }
4529
4530
    /**
4531
     * Lowercase for all words in the string.
4532
     *
4533
     * @param string      $str                           <p>The input string.</p>
4534
     * @param string[]    $exceptions                    [optional] <p>Exclusion for some words.</p>
4535
     * @param string      $char_list                     [optional] <p>Additional chars that contains to words and do
4536
     *                                                   not start a new word.</p>
4537
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
4538
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
4539
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
4540
     *                                                   tr</p>
4541
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
4542
     *                                                   -> ß</p>
4543
     *
4544
     * @psalm-pure
4545
     *
4546
     * @return string
4547
     */
4548 2
    public static function lcwords(
4549
        string $str,
4550
        array $exceptions = [],
4551
        string $char_list = '',
4552
        string $encoding = 'UTF-8',
4553
        bool $clean_utf8 = false,
4554
        string $lang = null,
4555
        bool $try_to_keep_the_string_length = false
4556
    ): string {
4557 2
        if (!$str) {
4558 2
            return '';
4559
        }
4560
4561 2
        $words = self::str_to_words($str, $char_list);
4562 2
        $use_exceptions = $exceptions !== [];
4563
4564 2
        $words_str = '';
4565 2
        foreach ($words as &$word) {
4566 2
            if (!$word) {
4567 2
                continue;
4568
            }
4569
4570
            if (
4571 2
                !$use_exceptions
4572
                ||
4573 2
                !\in_array($word, $exceptions, true)
4574
            ) {
4575 2
                $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
4576
            } else {
4577 2
                $words_str .= $word;
4578
            }
4579
        }
4580
4581 2
        return $words_str;
4582
    }
4583
4584
    /**
4585
     * alias for "UTF8::lcfirst()"
4586
     *
4587
     * @param string      $str
4588
     * @param string      $encoding
4589
     * @param bool        $clean_utf8
4590
     * @param string|null $lang
4591
     * @param bool        $try_to_keep_the_string_length
4592
     *
4593
     * @psalm-pure
4594
     *
4595
     * @return string
4596
     *
4597
     * @see        UTF8::lcfirst()
4598
     * @deprecated <p>please use "UTF8::lcfirst()"</p>
4599
     */
4600 5
    public static function lowerCaseFirst(
4601
        string $str,
4602
        string $encoding = 'UTF-8',
4603
        bool $clean_utf8 = false,
4604
        string $lang = null,
4605
        bool $try_to_keep_the_string_length = false
4606
    ): string {
4607 5
        return self::lcfirst(
4608 5
            $str,
4609 5
            $encoding,
4610 5
            $clean_utf8,
4611 5
            $lang,
4612 5
            $try_to_keep_the_string_length
4613
        );
4614
    }
4615
4616
    /**
4617
     * Strip whitespace or other characters from the beginning of a UTF-8 string.
4618
     *
4619
     * EXAMPLE: <code>UTF8::ltrim(' 中文空白  '); // '中文空白  '</code>
4620
     *
4621
     * @param string      $str   <p>The string to be trimmed</p>
4622
     * @param string|null $chars <p>Optional characters to be stripped</p>
4623
     *
4624
     * @psalm-pure
4625
     *
4626
     * @return string the string with unwanted characters stripped from the left
4627
     */
4628 23
    public static function ltrim(string $str = '', string $chars = null): string
4629
    {
4630 23
        if ($str === '') {
4631 3
            return '';
4632
        }
4633
4634 22
        if (self::$SUPPORT['mbstring'] === true) {
4635 22
            if ($chars !== null) {
4636
                /** @noinspection PregQuoteUsageInspection */
4637 11
                $chars = \preg_quote($chars);
4638 11
                $pattern = "^[${chars}]+";
4639
            } else {
4640 14
                $pattern = '^[\\s]+';
4641
            }
4642
4643
            /** @noinspection PhpComposerExtensionStubsInspection */
4644 22
            return (string) \mb_ereg_replace($pattern, '', $str);
4645
        }
4646
4647
        if ($chars !== null) {
4648
            $chars = \preg_quote($chars, '/');
4649
            $pattern = "^[${chars}]+";
4650
        } else {
4651
            $pattern = '^[\\s]+';
4652
        }
4653
4654
        return self::regex_replace($str, $pattern, '');
4655
    }
4656
4657
    /**
4658
     * Returns the UTF-8 character with the maximum code point in the given data.
4659
     *
4660
     * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code>
4661
     *
4662
     * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p>
4663
     *
4664
     * @psalm-pure
4665
     *
4666
     * @return string|null the character with the highest code point than others, returns null on failure or empty input
4667
     */
4668
    public static function max($arg)
4669
    {
4670 2
        if (\is_array($arg)) {
4671 2
            $arg = \implode('', $arg);
4672
        }
4673
4674 2
        $codepoints = self::codepoints($arg);
4675 2
        if ($codepoints === []) {
4676 2
            return null;
4677
        }
4678
4679 2
        $codepoint_max = \max($codepoints);
4680
4681 2
        return self::chr((int) $codepoint_max);
4682
    }
4683
4684
    /**
4685
     * Calculates and returns the maximum number of bytes taken by any
4686
     * UTF-8 encoded character in the given string.
4687
     *
4688
     * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code>
4689
     *
4690
     * @param string $str <p>The original Unicode string.</p>
4691
     *
4692
     * @psalm-pure
4693
     *
4694
     * @return int
4695
     *             <p>Max byte lengths of the given chars.</p>
4696
     */
4697
    public static function max_chr_width(string $str): int
4698
    {
4699 2
        $bytes = self::chr_size_list($str);
4700 2
        if ($bytes !== []) {
4701 2
            return (int) \max($bytes);
4702
        }
4703
4704 2
        return 0;
4705
    }
4706
4707
    /**
4708
     * Checks whether mbstring is available on the server.
4709
     *
4710
     * @psalm-pure
4711
     *
4712
     * @return bool
4713
     *              <p><strong>true</strong> if available, <strong>false</strong> otherwise</p>
4714
     */
4715
    public static function mbstring_loaded(): bool
4716
    {
4717 26
        return \extension_loaded('mbstring');
4718
    }
4719
4720
    /**
4721
     * Returns the UTF-8 character with the minimum code point in the given data.
4722
     *
4723
     * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code>
4724
     *
4725
     * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
4726
     *
4727
     * @psalm-pure
4728
     *
4729
     * @return string|null
4730
     *                     <p>The character with the lowest code point than others, returns null on failure or empty input.</p>
4731
     */
4732
    public static function min($arg)
4733
    {
4734 2
        if (\is_array($arg)) {
4735 2
            $arg = \implode('', $arg);
4736
        }
4737
4738 2
        $codepoints = self::codepoints($arg);
4739 2
        if ($codepoints === []) {
4740 2
            return null;
4741
        }
4742
4743 2
        $codepoint_min = \min($codepoints);
4744
4745 2
        return self::chr((int) $codepoint_min);
4746
    }
4747
4748
    /**
4749
     * alias for "UTF8::normalize_encoding()"
4750
     *
4751
     * @param mixed $encoding
4752
     * @param mixed $fallback
4753
     *
4754
     * @psalm-pure
4755
     *
4756
     * @return mixed
4757
     *
4758
     * @see        UTF8::normalize_encoding()
4759
     * @deprecated <p>please use "UTF8::normalize_encoding()"</p>
4760
     */
4761
    public static function normalizeEncoding($encoding, $fallback = '')
4762
    {
4763 2
        return self::normalize_encoding($encoding, $fallback);
4764
    }
4765
4766
    /**
4767
     * Normalize the encoding-"name" input.
4768
     *
4769
     * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code>
4770
     *
4771
     * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
4772
     * @param mixed $fallback <p>e.g.: UTF-8</p>
4773
     *
4774
     * @psalm-pure
4775
     *
4776
     * @return mixed|string
4777
     *                      <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p>
4778
     *
4779
     * @template TNormalizeEncodingFallback
4780
     * @psalm-param string|TNormalizeEncodingFallback $fallback
4781
     * @psalm-return string|TNormalizeEncodingFallback
4782
     */
4783
    public static function normalize_encoding($encoding, $fallback = '')
4784
    {
4785
        /**
4786
         * @psalm-suppress ImpureStaticVariable
4787
         *
4788
         * @var array<string,string>
4789
         */
4790 339
        static $STATIC_NORMALIZE_ENCODING_CACHE = [];
4791
4792
        // init
4793 339
        $encoding = (string) $encoding;
4794
4795 339
        if (!$encoding) {
4796 290
            return $fallback;
4797
        }
4798
4799
        if (
4800 53
            $encoding === 'UTF-8'
4801
            ||
4802 53
            $encoding === 'UTF8'
4803
        ) {
4804 29
            return 'UTF-8';
4805
        }
4806
4807
        if (
4808 44
            $encoding === '8BIT'
4809
            ||
4810 44
            $encoding === 'BINARY'
4811
        ) {
4812
            return 'CP850';
4813
        }
4814
4815
        if (
4816 44
            $encoding === 'HTML'
4817
            ||
4818 44
            $encoding === 'HTML-ENTITIES'
4819
        ) {
4820 2
            return 'HTML-ENTITIES';
4821
        }
4822
4823
        if (
4824 44
            $encoding === 'ISO'
4825
            ||
4826 44
            $encoding === 'ISO-8859-1'
4827
        ) {
4828 41
            return 'ISO-8859-1';
4829
        }
4830
4831
        if (
4832 11
            $encoding === '1' // only a fallback, for non "strict_types" usage ...
4833
            ||
4834 11
            $encoding === '0' // only a fallback, for non "strict_types" usage ...
4835
        ) {
4836
            return $fallback;
4837
        }
4838
4839 11
        if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
4840 8
            return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
4841
        }
4842
4843 5
        if (self::$ENCODINGS === null) {
4844 1
            self::$ENCODINGS = self::getData('encodings');
4845
        }
4846
4847 5
        if (\in_array($encoding, self::$ENCODINGS, true)) {
4848 3
            $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding;
4849
4850 3
            return $encoding;
4851
        }
4852
4853 4
        $encoding_original = $encoding;
4854 4
        $encoding = \strtoupper($encoding);
4855 4
        $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding);
4856
4857
        $equivalences = [
4858 4
            'ISO8859'     => 'ISO-8859-1',
4859
            'ISO88591'    => 'ISO-8859-1',
4860
            'ISO'         => 'ISO-8859-1',
4861
            'LATIN'       => 'ISO-8859-1',
4862
            'LATIN1'      => 'ISO-8859-1', // Western European
4863
            'ISO88592'    => 'ISO-8859-2',
4864
            'LATIN2'      => 'ISO-8859-2', // Central European
4865
            'ISO88593'    => 'ISO-8859-3',
4866
            'LATIN3'      => 'ISO-8859-3', // Southern European
4867
            'ISO88594'    => 'ISO-8859-4',
4868
            'LATIN4'      => 'ISO-8859-4', // Northern European
4869
            'ISO88595'    => 'ISO-8859-5',
4870
            'ISO88596'    => 'ISO-8859-6', // Greek
4871
            'ISO88597'    => 'ISO-8859-7',
4872
            'ISO88598'    => 'ISO-8859-8', // Hebrew
4873
            'ISO88599'    => 'ISO-8859-9',
4874
            'LATIN5'      => 'ISO-8859-9', // Turkish
4875
            'ISO885911'   => 'ISO-8859-11',
4876
            'TIS620'      => 'ISO-8859-11', // Thai
4877
            'ISO885910'   => 'ISO-8859-10',
4878
            'LATIN6'      => 'ISO-8859-10', // Nordic
4879
            'ISO885913'   => 'ISO-8859-13',
4880
            'LATIN7'      => 'ISO-8859-13', // Baltic
4881
            'ISO885914'   => 'ISO-8859-14',
4882
            'LATIN8'      => 'ISO-8859-14', // Celtic
4883
            'ISO885915'   => 'ISO-8859-15',
4884
            'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
4885
            'ISO885916'   => 'ISO-8859-16',
4886
            'LATIN10'     => 'ISO-8859-16', // Southeast European
4887
            'CP1250'      => 'WINDOWS-1250',
4888
            'WIN1250'     => 'WINDOWS-1250',
4889
            'WINDOWS1250' => 'WINDOWS-1250',
4890
            'CP1251'      => 'WINDOWS-1251',
4891
            'WIN1251'     => 'WINDOWS-1251',
4892
            'WINDOWS1251' => 'WINDOWS-1251',
4893
            'CP1252'      => 'WINDOWS-1252',
4894
            'WIN1252'     => 'WINDOWS-1252',
4895
            'WINDOWS1252' => 'WINDOWS-1252',
4896
            'CP1253'      => 'WINDOWS-1253',
4897
            'WIN1253'     => 'WINDOWS-1253',
4898
            'WINDOWS1253' => 'WINDOWS-1253',
4899
            'CP1254'      => 'WINDOWS-1254',
4900
            'WIN1254'     => 'WINDOWS-1254',
4901
            'WINDOWS1254' => 'WINDOWS-1254',
4902
            'CP1255'      => 'WINDOWS-1255',
4903
            'WIN1255'     => 'WINDOWS-1255',
4904
            'WINDOWS1255' => 'WINDOWS-1255',
4905
            'CP1256'      => 'WINDOWS-1256',
4906
            'WIN1256'     => 'WINDOWS-1256',
4907
            'WINDOWS1256' => 'WINDOWS-1256',
4908
            'CP1257'      => 'WINDOWS-1257',
4909
            'WIN1257'     => 'WINDOWS-1257',
4910
            'WINDOWS1257' => 'WINDOWS-1257',
4911
            'CP1258'      => 'WINDOWS-1258',
4912
            'WIN1258'     => 'WINDOWS-1258',
4913
            'WINDOWS1258' => 'WINDOWS-1258',
4914
            'UTF16'       => 'UTF-16',
4915
            'UTF32'       => 'UTF-32',
4916
            'UTF8'        => 'UTF-8',
4917
            'UTF'         => 'UTF-8',
4918
            'UTF7'        => 'UTF-7',
4919
            '8BIT'        => 'CP850',
4920
            'BINARY'      => 'CP850',
4921
        ];
4922
4923 4
        if (!empty($equivalences[$encoding_upper_helper])) {
4924 3
            $encoding = $equivalences[$encoding_upper_helper];
4925
        }
4926
4927 4
        $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding;
4928
4929 4
        return $encoding;
4930
    }
4931
4932
    /**
4933
     * Standardize line ending to unix-like.
4934
     *
4935
     * @param string          $str      <p>The input string.</p>
4936
     * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL
4937
     *                                  here.</p>
4938
     *
4939
     * @psalm-pure
4940
     *
4941
     * @return string
4942
     *                <p>A string with normalized line ending.</p>
4943
     */
4944
    public static function normalize_line_ending(string $str, $replacer = "\n"): string
4945
    {
4946 5
        return \str_replace(["\r\n", "\r", "\n"], $replacer, $str);
4947
    }
4948
4949
    /**
4950
     * Normalize some MS Word special characters.
4951
     *
4952
     * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code>
4953
     *
4954
     * @param string $str <p>The string to be normalized.</p>
4955
     *
4956
     * @psalm-pure
4957
     *
4958
     * @return string
4959
     *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
4960
     */
4961
    public static function normalize_msword(string $str): string
4962
    {
4963 10
        return ASCII::normalize_msword($str);
4964
    }
4965
4966
    /**
4967
     * Normalize the whitespace.
4968
     *
4969
     * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code>
4970
     *
4971
     * @param string $str                        <p>The string to be normalized.</p>
4972
     * @param bool   $keep_non_breaking_space    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
4973
     * @param bool   $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web)
4974
     *                                           bidirectional text chars.</p>
4975
     *
4976
     * @psalm-pure
4977
     *
4978
     * @return string
4979
     *                <p>A string with normalized whitespace.</p>
4980
     */
4981
    public static function normalize_whitespace(
4982
        string $str,
4983
        bool $keep_non_breaking_space = false,
4984
        bool $keep_bidi_unicode_controls = false
4985
    ): string {
4986 61
        return ASCII::normalize_whitespace(
4987 61
            $str,
4988 61
            $keep_non_breaking_space,
4989 61
            $keep_bidi_unicode_controls
4990
        );
4991
    }
4992
4993
    /**
4994
     * Calculates Unicode code point of the given UTF-8 encoded character.
4995
     *
4996
     * INFO: opposite to UTF8::chr()
4997
     *
4998
     * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code>
4999
     *
5000
     * @param string $chr      <p>The character of which to calculate code point.<p/>
5001
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
5002
     *
5003
     * @psalm-pure
5004
     *
5005
     * @return int
5006
     *             <p>Unicode code point of the given character,<br>
5007
     *             0 on invalid UTF-8 byte sequence</p>
5008
     */
5009
    public static function ord($chr, string $encoding = 'UTF-8'): int
5010
    {
5011
        /**
5012
         * @psalm-suppress ImpureStaticVariable
5013
         *
5014
         * @var array<string,int>
5015
         */
5016 27
        static $CHAR_CACHE = [];
5017
5018
        // init
5019 27
        $chr = (string) $chr;
5020
5021 27
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5022 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5023
        }
5024
5025 27
        $cache_key = $chr . '_' . $encoding;
5026 27
        if (isset($CHAR_CACHE[$cache_key])) {
5027 27
            return $CHAR_CACHE[$cache_key];
5028
        }
5029
5030
        // check again, if it's still not UTF-8
5031 11
        if ($encoding !== 'UTF-8') {
5032 3
            $chr = self::encode($encoding, $chr);
5033
        }
5034
5035 11
        if (self::$ORD === null) {
5036
            self::$ORD = self::getData('ord');
5037
        }
5038
5039 11
        if (isset(self::$ORD[$chr])) {
5040 11
            return $CHAR_CACHE[$cache_key] = self::$ORD[$chr];
5041
        }
5042
5043
        //
5044
        // fallback via "IntlChar"
5045
        //
5046
5047 6
        if (self::$SUPPORT['intlChar'] === true) {
5048
            /** @noinspection PhpComposerExtensionStubsInspection */
5049 5
            $code = \IntlChar::ord($chr);
5050 5
            if ($code) {
5051 5
                return $CHAR_CACHE[$cache_key] = $code;
5052
            }
5053
        }
5054
5055
        //
5056
        // fallback via vanilla php
5057
        //
5058
5059
        /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
5060 1
        $chr = \unpack('C*', (string) \substr($chr, 0, 4));
5061
        /** @noinspection OffsetOperationsInspection */
5062 1
        $code = $chr ? $chr[1] : 0;
5063
5064
        /** @noinspection OffsetOperationsInspection */
5065 1
        if ($code >= 0xF0 && isset($chr[4])) {
5066
            /** @noinspection UnnecessaryCastingInspection */
5067
            /** @noinspection OffsetOperationsInspection */
5068
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80);
5069
        }
5070
5071
        /** @noinspection OffsetOperationsInspection */
5072 1
        if ($code >= 0xE0 && isset($chr[3])) {
5073
            /** @noinspection UnnecessaryCastingInspection */
5074
            /** @noinspection OffsetOperationsInspection */
5075 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80);
5076
        }
5077
5078
        /** @noinspection OffsetOperationsInspection */
5079 1
        if ($code >= 0xC0 && isset($chr[2])) {
5080
            /** @noinspection UnnecessaryCastingInspection */
5081
            /** @noinspection OffsetOperationsInspection */
5082 1
            return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80);
5083
        }
5084
5085
        return $CHAR_CACHE[$cache_key] = $code;
5086
    }
5087
5088
    /**
5089
     * Parses the string into an array (into the the second parameter).
5090
     *
5091
     * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope,
5092
     *          if the second parameter is not set!
5093
     *
5094
     * EXAMPLE: <code>
5095
     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
5096
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
5097
     * </code>
5098
     *
5099
     * @see http://php.net/manual/en/function.parse-str.php
5100
     *
5101
     * @param string $str        <p>The input string.</p>
5102
     * @param array  $result     <p>The result will be returned into this reference parameter.</p>
5103
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5104
     *
5105
     * @psalm-pure
5106
     *
5107
     * @return bool
5108
     *              <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
5109
     */
5110
    public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool
5111
    {
5112 2
        if ($clean_utf8) {
5113 2
            $str = self::clean($str);
5114
        }
5115
5116 2
        if (self::$SUPPORT['mbstring'] === true) {
5117 2
            $return = \mb_parse_str($str, $result);
5118
5119 2
            return $return !== false && $result !== [];
5120
        }
5121
5122
        /**
5123
         * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic
5124
         */
5125
        \parse_str($str, $result);
5126
5127
        return $result !== [];
5128
    }
5129
5130
    /**
5131
     * Checks if \u modifier is available that enables Unicode support in PCRE.
5132
     *
5133
     * @psalm-pure
5134
     *
5135
     * @return bool
5136
     *              <p>
5137
     *              <strong>true</strong> if support is available,<br>
5138
     *              <strong>false</strong> otherwise
5139
     *              </p>
5140
     */
5141
    public static function pcre_utf8_support(): bool
5142
    {
5143
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
5144
        return (bool) @\preg_match('//u', '');
5145
    }
5146
5147
    /**
5148
     * Create an array containing a range of UTF-8 characters.
5149
     *
5150
     * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code>
5151
     *
5152
     * @param int|string $var1      <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
5153
     * @param int|string $var2      <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
5154
     * @param bool       $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple
5155
     *                              "is_numeric"</p>
5156
     * @param string     $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
5157
     * @param float|int  $step      [optional] <p>
5158
     *                              If a step value is given, it will be used as the
5159
     *                              increment between elements in the sequence. step
5160
     *                              should be given as a positive number. If not specified,
5161
     *                              step will default to 1.
5162
     *                              </p>
5163
     *
5164
     * @psalm-pure
5165
     *
5166
     * @return string[]
5167
     */
5168
    public static function range(
5169
        $var1,
5170
        $var2,
5171
        bool $use_ctype = true,
5172
        string $encoding = 'UTF-8',
5173
        $step = 1
5174
    ): array {
5175 2
        if (!$var1 || !$var2) {
5176 2
            return [];
5177
        }
5178
5179 2
        if ($step !== 1) {
5180
            /**
5181
             * @psalm-suppress RedundantConditionGivenDocblockType
5182
             * @psalm-suppress DocblockTypeContradiction
5183
             */
5184 1
            if (!\is_numeric($step)) {
0 ignored issues
show
introduced by
The condition is_numeric($step) is always true.
Loading history...
5185
                throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step));
5186
            }
5187
5188
            /**
5189
             * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm?
5190
             */
5191 1
            if ($step <= 0) {
5192
                throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step);
5193
            }
5194
        }
5195
5196 2
        if ($use_ctype && self::$SUPPORT['ctype'] === false) {
5197
            throw new \RuntimeException('ext-ctype: is not installed');
5198
        }
5199
5200 2
        $is_digit = false;
5201 2
        $is_xdigit = false;
5202
5203
        /** @noinspection PhpComposerExtensionStubsInspection */
5204 2
        if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) {
5205 2
            $is_digit = true;
5206 2
            $start = (int) $var1;
5207 2
        } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) {
5208
            $is_xdigit = true;
5209
            $start = (int) self::hex_to_int((string) $var1);
5210 2
        } elseif (!$use_ctype && \is_numeric($var1)) {
5211 1
            $start = (int) $var1;
5212
        } else {
5213 2
            $start = self::ord((string) $var1);
5214
        }
5215
5216 2
        if (!$start) {
5217
            return [];
5218
        }
5219
5220 2
        if ($is_digit) {
5221 2
            $end = (int) $var2;
5222 2
        } elseif ($is_xdigit) {
5223
            $end = (int) self::hex_to_int((string) $var2);
5224 2
        } elseif (!$use_ctype && \is_numeric($var2)) {
5225 1
            $end = (int) $var2;
5226
        } else {
5227 2
            $end = self::ord((string) $var2);
5228
        }
5229
5230 2
        if (!$end) {
5231
            return [];
5232
        }
5233
5234 2
        $array = [];
5235 2
        foreach (\range($start, $end, $step) as $i) {
5236 2
            $array[] = (string) self::chr((int) $i, $encoding);
5237
        }
5238
5239 2
        return $array;
5240
    }
5241
5242
    /**
5243
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
5244
     *
5245
     * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code>
5246
     *
5247
     * e.g:
5248
     * 'test+test'                     => 'test+test'
5249
     * 'D&#252;sseldorf'               => 'Düsseldorf'
5250
     * 'D%FCsseldorf'                  => 'Düsseldorf'
5251
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5252
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5253
     * 'Düsseldorf'                   => 'Düsseldorf'
5254
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5255
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5256
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5257
     *
5258
     * @param string $str          <p>The input string.</p>
5259
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
5260
     *
5261
     * @psalm-pure
5262
     *
5263
     * @return string
5264
     *                <p>The decoded URL, as a string.</p>
5265
     */
5266
    public static function rawurldecode(string $str, bool $multi_decode = true): string
5267
    {
5268 7
        if ($str === '') {
5269 4
            return '';
5270
        }
5271
5272
        if (
5273 7
            \strpos($str, '&') === false
5274
            &&
5275 7
            \strpos($str, '%') === false
5276
            &&
5277 7
            \strpos($str, '+') === false
5278
            &&
5279 7
            \strpos($str, '\u') === false
5280
        ) {
5281 4
            return self::fix_simple_utf8($str);
5282
        }
5283
5284 7
        $str = self::urldecode_unicode_helper($str);
5285
5286 7
        if ($multi_decode) {
5287
            do {
5288 6
                $str_compare = $str;
5289
5290
                /**
5291
                 * @psalm-suppress PossiblyInvalidArgument
5292
                 */
5293 6
                $str = self::fix_simple_utf8(
5294 6
                    \rawurldecode(
5295 6
                        self::html_entity_decode(
5296 6
                            self::to_utf8($str),
5297 6
                            \ENT_QUOTES | \ENT_HTML5
5298
                        )
5299
                    )
5300
                );
5301 6
            } while ($str_compare !== $str);
5302
        } else {
5303
            /**
5304
             * @psalm-suppress PossiblyInvalidArgument
5305
             */
5306 1
            $str = self::fix_simple_utf8(
5307 1
                \rawurldecode(
5308 1
                    self::html_entity_decode(
5309 1
                        self::to_utf8($str),
5310 1
                        \ENT_QUOTES | \ENT_HTML5
5311
                    )
5312
                )
5313
            );
5314
        }
5315
5316 7
        return $str;
5317
    }
5318
5319
    /**
5320
     * Replaces all occurrences of $pattern in $str by $replacement.
5321
     *
5322
     * @param string $str         <p>The input string.</p>
5323
     * @param string $pattern     <p>The regular expression pattern.</p>
5324
     * @param string $replacement <p>The string to replace with.</p>
5325
     * @param string $options     [optional] <p>Matching conditions to be used.</p>
5326
     * @param string $delimiter   [optional] <p>Delimiter the the regex. Default: '/'</p>
5327
     *
5328
     * @psalm-pure
5329
     *
5330
     * @return string
5331
     */
5332
    public static function regex_replace(
5333
        string $str,
5334
        string $pattern,
5335
        string $replacement,
5336
        string $options = '',
5337
        string $delimiter = '/'
5338
    ): string {
5339 18
        if ($options === 'msr') {
5340 9
            $options = 'ms';
5341
        }
5342
5343
        // fallback
5344 18
        if (!$delimiter) {
5345
            $delimiter = '/';
5346
        }
5347
5348 18
        return (string) \preg_replace(
5349 18
            $delimiter . $pattern . $delimiter . 'u' . $options,
5350 18
            $replacement,
5351 18
            $str
5352
        );
5353
    }
5354
5355
    /**
5356
     * alias for "UTF8::remove_bom()"
5357
     *
5358
     * @param string $str
5359
     *
5360
     * @psalm-pure
5361
     *
5362
     * @return string
5363
     *
5364
     * @see        UTF8::remove_bom()
5365
     * @deprecated <p>please use "UTF8::remove_bom()"</p>
5366
     */
5367
    public static function removeBOM(string $str): string
5368
    {
5369 1
        return self::remove_bom($str);
5370
    }
5371
5372
    /**
5373
     * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
5374
     *
5375
     * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code>
5376
     *
5377
     * @param string $str <p>The input string.</p>
5378
     *
5379
     * @psalm-pure
5380
     *
5381
     * @return string
5382
     *                <p>A string without UTF-BOM.</p>
5383
     */
5384
    public static function remove_bom(string $str): string
5385
    {
5386 55
        if ($str === '') {
5387 9
            return '';
5388
        }
5389
5390 55
        $str_length = \strlen($str);
5391 55
        foreach (self::$BOM as $bom_string => $bom_byte_length) {
5392 55
            if (\strpos($str, $bom_string) === 0) {
5393
                /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
5394 11
                $str_tmp = \substr($str, $bom_byte_length, $str_length);
5395 11
                if ($str_tmp === false) {
5396
                    return '';
5397
                }
5398
5399 11
                $str_length -= (int) $bom_byte_length;
5400
5401 55
                $str = (string) $str_tmp;
5402
            }
5403
        }
5404
5405 55
        return $str;
5406
    }
5407
5408
    /**
5409
     * Removes duplicate occurrences of a string in another string.
5410
     *
5411
     * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code>
5412
     *
5413
     * @param string          $str  <p>The base string.</p>
5414
     * @param string|string[] $what <p>String to search for in the base string.</p>
5415
     *
5416
     * @psalm-pure
5417
     *
5418
     * @return string
5419
     *                <p>A string with removed duplicates.</p>
5420
     */
5421
    public static function remove_duplicates(string $str, $what = ' '): string
5422
    {
5423 2
        if (\is_string($what)) {
5424 2
            $what = [$what];
5425
        }
5426
5427
        /**
5428
         * @psalm-suppress RedundantConditionGivenDocblockType
5429
         */
5430 2
        if (\is_array($what)) {
0 ignored issues
show
introduced by
The condition is_array($what) is always true.
Loading history...
5431 2
            foreach ($what as $item) {
5432 2
                $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str);
5433
            }
5434
        }
5435
5436 2
        return $str;
5437
    }
5438
5439
    /**
5440
     * Remove html via "strip_tags()" from the string.
5441
     *
5442
     * @param string $str            <p>The input string.</p>
5443
     * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which
5444
     *                               should not be stripped. Default: null
5445
     *                               </p>
5446
     *
5447
     * @psalm-pure
5448
     *
5449
     * @return string
5450
     *                <p>A string with without html tags.</p>
5451
     */
5452
    public static function remove_html(string $str, string $allowable_tags = ''): string
5453
    {
5454 6
        return \strip_tags($str, $allowable_tags);
5455
    }
5456
5457
    /**
5458
     * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string.
5459
     *
5460
     * @param string $str         <p>The input string.</p>
5461
     * @param string $replacement [optional] <p>Default is a empty string.</p>
5462
     *
5463
     * @psalm-pure
5464
     *
5465
     * @return string
5466
     *                <p>A string without breaks.</p>
5467
     */
5468
    public static function remove_html_breaks(string $str, string $replacement = ''): string
5469
    {
5470 6
        return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str);
5471
    }
5472
5473
    /**
5474
     * Remove invisible characters from a string.
5475
     *
5476
     * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
5477
     *
5478
     * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code>
5479
     *
5480
     * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
5481
     *
5482
     * @param string $str         <p>The input string.</p>
5483
     * @param bool   $url_encoded [optional] <p>
5484
     *                            Try to remove url encoded control character.
5485
     *                            WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa.
5486
     *                            <br>
5487
     *                            Default: false
5488
     *                            </p>
5489
     * @param string $replacement [optional] <p>The replacement character.</p>
5490
     *
5491
     * @psalm-pure
5492
     *
5493
     * @return string
5494
     *                <p>A string without invisible chars.</p>
5495
     */
5496
    public static function remove_invisible_characters(
5497
        string $str,
5498
        bool $url_encoded = false,
5499
        string $replacement = ''
5500
    ): string {
5501 91
        return ASCII::remove_invisible_characters(
5502 91
            $str,
5503 91
            $url_encoded,
5504 91
            $replacement
5505
        );
5506
    }
5507
5508
    /**
5509
     * Returns a new string with the prefix $substring removed, if present.
5510
     *
5511
     * @param string $str       <p>The input string.</p>
5512
     * @param string $substring <p>The prefix to remove.</p>
5513
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5514
     *
5515
     * @psalm-pure
5516
     *
5517
     * @return string
5518
     *                <p>A string without the prefix $substring.</p>
5519
     */
5520
    public static function remove_left(
5521
        string $str,
5522
        string $substring,
5523
        string $encoding = 'UTF-8'
5524
    ): string {
5525 12
        if ($substring && \strpos($str, $substring) === 0) {
5526 6
            if ($encoding === 'UTF-8') {
5527 4
                return (string) \mb_substr(
5528 4
                    $str,
5529 4
                    (int) \mb_strlen($substring)
5530
                );
5531
            }
5532
5533 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5534
5535 2
            return (string) self::substr(
5536 2
                $str,
5537 2
                (int) self::strlen($substring, $encoding),
5538 2
                null,
5539 2
                $encoding
5540
            );
5541
        }
5542
5543 6
        return $str;
5544
    }
5545
5546
    /**
5547
     * Returns a new string with the suffix $substring removed, if present.
5548
     *
5549
     * @param string $str
5550
     * @param string $substring <p>The suffix to remove.</p>
5551
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
5552
     *
5553
     * @psalm-pure
5554
     *
5555
     * @return string
5556
     *                <p>A string having a $str without the suffix $substring.</p>
5557
     */
5558
    public static function remove_right(
5559
        string $str,
5560
        string $substring,
5561
        string $encoding = 'UTF-8'
5562
    ): string {
5563 12
        if ($substring && \substr($str, -\strlen($substring)) === $substring) {
5564 6
            if ($encoding === 'UTF-8') {
5565 4
                return (string) \mb_substr(
5566 4
                    $str,
5567 4
                    0,
5568 4
                    (int) \mb_strlen($str) - (int) \mb_strlen($substring)
5569
                );
5570
            }
5571
5572 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5573
5574 2
            return (string) self::substr(
5575 2
                $str,
5576 2
                0,
5577 2
                (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding),
5578 2
                $encoding
5579
            );
5580
        }
5581
5582 6
        return $str;
5583
    }
5584
5585
    /**
5586
     * Replaces all occurrences of $search in $str by $replacement.
5587
     *
5588
     * @param string $str            <p>The input string.</p>
5589
     * @param string $search         <p>The needle to search for.</p>
5590
     * @param string $replacement    <p>The string to replace with.</p>
5591
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5592
     *
5593
     * @psalm-pure
5594
     *
5595
     * @return string
5596
     *                <p>A string with replaced parts.</p>
5597
     */
5598
    public static function replace(
5599
        string $str,
5600
        string $search,
5601
        string $replacement,
5602
        bool $case_sensitive = true
5603
    ): string {
5604 29
        if ($case_sensitive) {
5605 22
            return \str_replace($search, $replacement, $str);
5606
        }
5607
5608 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5609
    }
5610
5611
    /**
5612
     * Replaces all occurrences of $search in $str by $replacement.
5613
     *
5614
     * @param string       $str            <p>The input string.</p>
5615
     * @param array        $search         <p>The elements to search for.</p>
5616
     * @param array|string $replacement    <p>The string to replace with.</p>
5617
     * @param bool         $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5618
     *
5619
     * @psalm-pure
5620
     *
5621
     * @return string
5622
     *                <p>A string with replaced parts.</p>
5623
     */
5624
    public static function replace_all(
5625
        string $str,
5626
        array $search,
5627
        $replacement,
5628
        bool $case_sensitive = true
5629
    ): string {
5630 30
        if ($case_sensitive) {
5631 23
            return \str_replace($search, $replacement, $str);
5632
        }
5633
5634 7
        return self::str_ireplace($search, $replacement, $str);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_ireplac...ch, $replacement, $str) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
5635
    }
5636
5637
    /**
5638
     * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
5639
     *
5640
     * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code>
5641
     *
5642
     * @param string $str                        <p>The input string</p>
5643
     * @param string $replacement_char           <p>The replacement character.</p>
5644
     * @param bool   $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p>
5645
     *
5646
     * @psalm-pure
5647
     *
5648
     * @return string
5649
     *                <p>A string without diamond question marks (�).</p>
5650
     */
5651
    public static function replace_diamond_question_mark(
5652
        string $str,
5653
        string $replacement_char = '',
5654
        bool $process_invalid_utf8_chars = true
5655
    ): string {
5656 35
        if ($str === '') {
5657 9
            return '';
5658
        }
5659
5660 35
        if ($process_invalid_utf8_chars) {
5661 35
            $replacement_char_helper = $replacement_char;
5662 35
            if ($replacement_char === '') {
5663 35
                $replacement_char_helper = 'none';
5664
            }
5665
5666 35
            if (self::$SUPPORT['mbstring'] === false) {
5667
                // if there is no native support for "mbstring",
5668
                // then we need to clean the string before ...
5669
                $str = self::clean($str);
5670
            }
5671
5672
            /**
5673
             * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step
5674
             */
5675 35
            $save = \mb_substitute_character();
5676
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */
5677 35
            @\mb_substitute_character($replacement_char_helper);
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for mb_substitute_character(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

5677
            /** @scrutinizer ignore-unhandled */ @\mb_substitute_character($replacement_char_helper);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
5678
            // the polyfill maybe return false, so cast to string
5679 35
            $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
5680 35
            \mb_substitute_character($save);
5681
        }
5682
5683 35
        return \str_replace(
5684
            [
5685 35
                "\xEF\xBF\xBD",
5686
                '�',
5687
            ],
5688
            [
5689 35
                $replacement_char,
5690 35
                $replacement_char,
5691
            ],
5692 35
            $str
5693
        );
5694
    }
5695
5696
    /**
5697
     * Strip whitespace or other characters from the end of a UTF-8 string.
5698
     *
5699
     * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白-  '); // '-ABC-中文空白-'</code>
5700
     *
5701
     * @param string      $str   <p>The string to be trimmed.</p>
5702
     * @param string|null $chars <p>Optional characters to be stripped.</p>
5703
     *
5704
     * @psalm-pure
5705
     *
5706
     * @return string
5707
     *                <p>A string with unwanted characters stripped from the right.</p>
5708
     */
5709
    public static function rtrim(string $str = '', string $chars = null): string
5710
    {
5711 21
        if ($str === '') {
5712 3
            return '';
5713
        }
5714
5715 20
        if (self::$SUPPORT['mbstring'] === true) {
5716 20
            if ($chars !== null) {
5717
                /** @noinspection PregQuoteUsageInspection */
5718 9
                $chars = \preg_quote($chars);
5719 9
                $pattern = "[${chars}]+$";
5720
            } else {
5721 14
                $pattern = '[\\s]+$';
5722
            }
5723
5724
            /** @noinspection PhpComposerExtensionStubsInspection */
5725 20
            return (string) \mb_ereg_replace($pattern, '', $str);
5726
        }
5727
5728
        if ($chars !== null) {
5729
            $chars = \preg_quote($chars, '/');
5730
            $pattern = "[${chars}]+$";
5731
        } else {
5732
            $pattern = '[\\s]+$';
5733
        }
5734
5735
        return self::regex_replace($str, $pattern, '');
5736
    }
5737
5738
    /**
5739
     * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging.
5740
     *
5741
     * @param bool $useEcho
5742
     *
5743
     * @psalm-pure
5744
     *
5745
     * @return string|void
5746
     */
5747
    public static function showSupport(bool $useEcho = true)
5748
    {
5749
        // init
5750 2
        $html = '';
5751
5752 2
        $html .= '<pre>';
5753
        /** @noinspection AlterInForeachInspection */
5754 2
        foreach (self::$SUPPORT as $key => &$value) {
5755 2
            $html .= $key . ' - ' . \print_r($value, true) . "\n<br>";
5756
        }
5757 2
        $html .= '</pre>';
5758
5759 2
        if ($useEcho) {
5760 1
            echo $html;
5761
        }
5762
5763 2
        return $html;
5764
    }
5765
5766
    /**
5767
     * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
5768
     *
5769
     * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // '&#954;'</code>
5770
     *
5771
     * @param string $char             <p>The Unicode character to be encoded as numbered entity.</p>
5772
     * @param bool   $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</>
5773
     * @param string $encoding         [optional] <p>Set the charset for e.g. "mb_" function</p>
5774
     *
5775
     * @psalm-pure
5776
     *
5777
     * @return string
5778
     *                <p>The HTML numbered entity for the given character.</p>
5779
     */
5780
    public static function single_chr_html_encode(
5781
        string $char,
5782
        bool $keep_ascii_chars = false,
5783
        string $encoding = 'UTF-8'
5784
    ): string {
5785 2
        if ($char === '') {
5786 2
            return '';
5787
        }
5788
5789
        if (
5790 2
            $keep_ascii_chars
5791
            &&
5792 2
            ASCII::is_ascii($char)
5793
        ) {
5794 2
            return $char;
5795
        }
5796
5797 2
        return '&#' . self::ord($char, $encoding) . ';';
5798
    }
5799
5800
    /**
5801
     * @param string $str
5802
     * @param int    $tab_length
5803
     *
5804
     * @psalm-pure
5805
     *
5806
     * @return string
5807
     */
5808
    public static function spaces_to_tabs(string $str, int $tab_length = 4): string
5809
    {
5810 5
        if ($tab_length === 4) {
5811 3
            $tab = '    ';
5812 2
        } elseif ($tab_length === 2) {
5813 1
            $tab = '  ';
5814
        } else {
5815 1
            $tab = \str_repeat(' ', $tab_length);
5816
        }
5817
5818 5
        return \str_replace($tab, "\t", $str);
5819
    }
5820
5821
    /**
5822
     * alias for "UTF8::str_split()"
5823
     *
5824
     * @param int|string $str
5825
     * @param int        $length
5826
     * @param bool       $clean_utf8
5827
     *
5828
     * @psalm-pure
5829
     *
5830
     * @return string[]
5831
     *
5832
     * @see        UTF8::str_split()
5833
     * @deprecated <p>please use "UTF8::str_split()"</p>
5834
     */
5835
    public static function split(
5836
        $str,
5837
        int $length = 1,
5838
        bool $clean_utf8 = false
5839
    ): array {
5840
        /** @var string[] */
5841 9
        return self::str_split($str, $length, $clean_utf8);
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split($str, $length, $clean_utf8) returns an array which contains values of type array|string[] which are incompatible with the documented value type string.
Loading history...
5842
    }
5843
5844
    /**
5845
     * alias for "UTF8::str_starts_with()"
5846
     *
5847
     * @param string $haystack
5848
     * @param string $needle
5849
     *
5850
     * @psalm-pure
5851
     *
5852
     * @return bool
5853
     *
5854
     * @see        UTF8::str_starts_with()
5855
     * @deprecated <p>please use "UTF8::str_starts_with()"</p>
5856
     */
5857
    public static function str_begins(string $haystack, string $needle): bool
5858
    {
5859 1
        return self::str_starts_with($haystack, $needle);
5860
    }
5861
5862
    /**
5863
     * Returns a camelCase version of the string. Trims surrounding spaces,
5864
     * capitalizes letters following digits, spaces, dashes and underscores,
5865
     * and removes spaces, dashes, as well as underscores.
5866
     *
5867
     * @param string      $str                           <p>The input string.</p>
5868
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
5869
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
5870
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
5871
     *                                                   tr</p>
5872
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
5873
     *                                                   -> ß</p>
5874
     *
5875
     * @psalm-pure
5876
     *
5877
     * @return string
5878
     */
5879
    public static function str_camelize(
5880
        string $str,
5881
        string $encoding = 'UTF-8',
5882
        bool $clean_utf8 = false,
5883
        string $lang = null,
5884
        bool $try_to_keep_the_string_length = false
5885
    ): string {
5886 32
        if ($clean_utf8) {
5887
            $str = self::clean($str);
5888
        }
5889
5890 32
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
5891 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
5892
        }
5893
5894 32
        $str = self::lcfirst(
5895 32
            \trim($str),
5896 32
            $encoding,
5897 32
            false,
5898 32
            $lang,
5899 32
            $try_to_keep_the_string_length
5900
        );
5901 32
        $str = (string) \preg_replace('/^[-_]+/', '', $str);
5902
5903 32
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
5904
5905 32
        $str = (string) \preg_replace_callback(
5906 32
            '/[-_\\s]+(.)?/u',
5907
            /**
5908
             * @param array $match
5909
             *
5910
             * @psalm-pure
5911
             *
5912
             * @return string
5913
             */
5914
            static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string {
5915 27
                if (isset($match[1])) {
5916 27
                    if ($use_mb_functions) {
5917 27
                        if ($encoding === 'UTF-8') {
5918 27
                            return \mb_strtoupper($match[1]);
5919
                        }
5920
5921
                        return \mb_strtoupper($match[1], $encoding);
5922
                    }
5923
5924
                    return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length);
5925
                }
5926
5927 1
                return '';
5928 32
            },
5929 32
            $str
5930
        );
5931
5932 32
        return (string) \preg_replace_callback(
5933 32
            '/[\\p{N}]+(.)?/u',
5934
            /**
5935
             * @param array $match
5936
             *
5937
             * @psalm-pure
5938
             *
5939
             * @return string
5940
             */
5941
            static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string {
5942 6
                if ($use_mb_functions) {
5943 6
                    if ($encoding === 'UTF-8') {
5944 6
                        return \mb_strtoupper($match[0]);
5945
                    }
5946
5947
                    return \mb_strtoupper($match[0], $encoding);
5948
                }
5949
5950
                return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
5951 32
            },
5952 32
            $str
5953
        );
5954
    }
5955
5956
    /**
5957
     * Returns the string with the first letter of each word capitalized,
5958
     * except for when the word is a name which shouldn't be capitalized.
5959
     *
5960
     * @param string $str
5961
     *
5962
     * @psalm-pure
5963
     *
5964
     * @return string
5965
     *                <p>A string with $str capitalized.</p>
5966
     */
5967
    public static function str_capitalize_name(string $str): string
5968
    {
5969 1
        return self::str_capitalize_name_helper(
5970 1
            self::str_capitalize_name_helper(
5971 1
                self::collapse_whitespace($str),
5972 1
                ' '
5973
            ),
5974 1
            '-'
5975
        );
5976
    }
5977
5978
    /**
5979
     * Returns true if the string contains $needle, false otherwise. By default
5980
     * the comparison is case-sensitive, but can be made insensitive by setting
5981
     * $case_sensitive to false.
5982
     *
5983
     * @param string $haystack       <p>The input string.</p>
5984
     * @param string $needle         <p>Substring to look for.</p>
5985
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
5986
     *
5987
     * @psalm-pure
5988
     *
5989
     * @return bool
5990
     *              <p>Whether or not $haystack contains $needle.</p>
5991
     */
5992
    public static function str_contains(
5993
        string $haystack,
5994
        string $needle,
5995
        bool $case_sensitive = true
5996
    ): bool {
5997 21
        if ($case_sensitive) {
5998 11
            return \strpos($haystack, $needle) !== false;
5999
        }
6000
6001 10
        return \mb_stripos($haystack, $needle) !== false;
6002
    }
6003
6004
    /**
6005
     * Returns true if the string contains all $needles, false otherwise. By
6006
     * default the comparison is case-sensitive, but can be made insensitive by
6007
     * setting $case_sensitive to false.
6008
     *
6009
     * @param string $haystack       <p>The input string.</p>
6010
     * @param array  $needles        <p>SubStrings to look for.</p>
6011
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6012
     *
6013
     * @psalm-pure
6014
     *
6015
     * @return bool
6016
     *              <p>Whether or not $haystack contains $needle.</p>
6017
     */
6018
    public static function str_contains_all(
6019
        string $haystack,
6020
        array $needles,
6021
        bool $case_sensitive = true
6022
    ): bool {
6023 45
        if ($haystack === '' || $needles === []) {
6024 1
            return false;
6025
        }
6026
6027
        /** @noinspection LoopWhichDoesNotLoopInspection */
6028 44
        foreach ($needles as &$needle) {
6029 44
            if ($case_sensitive) {
6030
                /** @noinspection NestedPositiveIfStatementsInspection */
6031 24
                if (!$needle || \strpos($haystack, $needle) === false) {
6032 12
                    return false;
6033
                }
6034
            }
6035
6036 33
            if (!$needle || \mb_stripos($haystack, $needle) === false) {
6037 33
                return false;
6038
            }
6039
        }
6040
6041 24
        return true;
6042
    }
6043
6044
    /**
6045
     * Returns true if the string contains any $needles, false otherwise. By
6046
     * default the comparison is case-sensitive, but can be made insensitive by
6047
     * setting $case_sensitive to false.
6048
     *
6049
     * @param string $haystack       <p>The input string.</p>
6050
     * @param array  $needles        <p>SubStrings to look for.</p>
6051
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
6052
     *
6053
     * @psalm-pure
6054
     *
6055
     * @return bool
6056
     *              <p>Whether or not $str contains $needle.</p>
6057
     */
6058
    public static function str_contains_any(
6059
        string $haystack,
6060
        array $needles,
6061
        bool $case_sensitive = true
6062
    ): bool {
6063 46
        if ($haystack === '' || $needles === []) {
6064 1
            return false;
6065
        }
6066
6067
        /** @noinspection LoopWhichDoesNotLoopInspection */
6068 45
        foreach ($needles as &$needle) {
6069 45
            if (!$needle) {
6070
                continue;
6071
            }
6072
6073 45
            if ($case_sensitive) {
6074 25
                if (\strpos($haystack, $needle) !== false) {
6075 14
                    return true;
6076
                }
6077
6078 13
                continue;
6079
            }
6080
6081 20
            if (\mb_stripos($haystack, $needle) !== false) {
6082 20
                return true;
6083
            }
6084
        }
6085
6086 19
        return false;
6087
    }
6088
6089
    /**
6090
     * Returns a lowercase and trimmed string separated by dashes. Dashes are
6091
     * inserted before uppercase characters (with the exception of the first
6092
     * character of the string), and in place of spaces as well as underscores.
6093
     *
6094
     * @param string $str      <p>The input string.</p>
6095
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6096
     *
6097
     * @psalm-pure
6098
     *
6099
     * @return string
6100
     */
6101
    public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string
6102
    {
6103 19
        return self::str_delimit($str, '-', $encoding);
6104
    }
6105
6106
    /**
6107
     * Returns a lowercase and trimmed string separated by the given delimiter.
6108
     * Delimiters are inserted before uppercase characters (with the exception
6109
     * of the first character of the string), and in place of spaces, dashes,
6110
     * and underscores. Alpha delimiters are not converted to lowercase.
6111
     *
6112
     * @param string      $str                           <p>The input string.</p>
6113
     * @param string      $delimiter                     <p>Sequence used to separate parts of the string.</p>
6114
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
6115
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
6116
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
6117
     *                                                   tr</p>
6118
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ ->
6119
     *                                                   ß</p>
6120
     *
6121
     * @psalm-pure
6122
     *
6123
     * @return string
6124
     */
6125
    public static function str_delimit(
6126
        string $str,
6127
        string $delimiter,
6128
        string $encoding = 'UTF-8',
6129
        bool $clean_utf8 = false,
6130
        string $lang = null,
6131
        bool $try_to_keep_the_string_length = false
6132
    ): string {
6133 49
        if (self::$SUPPORT['mbstring'] === true) {
6134
            /** @noinspection PhpComposerExtensionStubsInspection */
6135 49
            $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str));
6136
6137 49
            $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6138 49
            if ($use_mb_functions && $encoding === 'UTF-8') {
6139 22
                $str = \mb_strtolower($str);
6140
            } else {
6141 27
                $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6142
            }
6143
6144
            /** @noinspection PhpComposerExtensionStubsInspection */
6145 49
            return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str);
6146
        }
6147
6148
        $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str));
6149
6150
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
6151
        if ($use_mb_functions && $encoding === 'UTF-8') {
6152
            $str = \mb_strtolower($str);
6153
        } else {
6154
            $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
6155
        }
6156
6157
        return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str);
6158
    }
6159
6160
    /**
6161
     * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
6162
     *
6163
     * EXAMPLE: <code>
6164
     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
6165
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
6166
     * </code>
6167
     *
6168
     * @param string $str <p>The input string.</p>
6169
     *
6170
     * @psalm-pure
6171
     *
6172
     * @return false|string
6173
     *                      <p>
6174
     *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
6175
     *                      otherwise it will return false e.g. for BINARY or not detected encoding.
6176
     *                      </p>
6177
     */
6178
    public static function str_detect_encoding($str)
6179
    {
6180
        // init
6181 31
        $str = (string) $str;
6182
6183
        //
6184
        // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ...
6185
        //
6186
6187 31
        if (self::is_binary($str, true)) {
6188 11
            $is_utf32 = self::is_utf32($str, false);
6189 11
            if ($is_utf32 === 1) {
6190
                return 'UTF-32LE';
6191
            }
6192 11
            if ($is_utf32 === 2) {
6193 1
                return 'UTF-32BE';
6194
            }
6195
6196 11
            $is_utf16 = self::is_utf16($str, false);
6197 11
            if ($is_utf16 === 1) {
6198 3
                return 'UTF-16LE';
6199
            }
6200 11
            if ($is_utf16 === 2) {
6201 2
                return 'UTF-16BE';
6202
            }
6203
6204
            // is binary but not "UTF-16" or "UTF-32"
6205 9
            return false;
6206
        }
6207
6208
        //
6209
        // 2.) simple check for ASCII chars
6210
        //
6211
6212 27
        if (ASCII::is_ascii($str)) {
6213 10
            return 'ASCII';
6214
        }
6215
6216
        //
6217
        // 3.) simple check for UTF-8 chars
6218
        //
6219
6220 27
        if (self::is_utf8_string($str)) {
6221 19
            return 'UTF-8';
6222
        }
6223
6224
        //
6225
        // 4.) check via "mb_detect_encoding()"
6226
        //
6227
        // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()"
6228
6229
        $encoding_detecting_order = [
6230 16
            'ISO-8859-1',
6231
            'ISO-8859-2',
6232
            'ISO-8859-3',
6233
            'ISO-8859-4',
6234
            'ISO-8859-5',
6235
            'ISO-8859-6',
6236
            'ISO-8859-7',
6237
            'ISO-8859-8',
6238
            'ISO-8859-9',
6239
            'ISO-8859-10',
6240
            'ISO-8859-13',
6241
            'ISO-8859-14',
6242
            'ISO-8859-15',
6243
            'ISO-8859-16',
6244
            'WINDOWS-1251',
6245
            'WINDOWS-1252',
6246
            'WINDOWS-1254',
6247
            'CP932',
6248
            'CP936',
6249
            'CP950',
6250
            'CP866',
6251
            'CP850',
6252
            'CP51932',
6253
            'CP50220',
6254
            'CP50221',
6255
            'CP50222',
6256
            'ISO-2022-JP',
6257
            'ISO-2022-KR',
6258
            'JIS',
6259
            'JIS-ms',
6260
            'EUC-CN',
6261
            'EUC-JP',
6262
        ];
6263
6264 16
        if (self::$SUPPORT['mbstring'] === true) {
6265
            // info: do not use the symfony polyfill here
6266 16
            $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true);
6267 16
            if ($encoding) {
6268 16
                return $encoding;
6269
            }
6270
        }
6271
6272
        //
6273
        // 5.) check via "iconv()"
6274
        //
6275
6276
        if (self::$ENCODINGS === null) {
6277
            self::$ENCODINGS = self::getData('encodings');
6278
        }
6279
6280
        foreach (self::$ENCODINGS as $encoding_tmp) {
6281
            // INFO: //IGNORE but still throw notice
6282
            /** @noinspection PhpUsageOfSilenceOperatorInspection */
6283
            if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) {
6284
                return $encoding_tmp;
6285
            }
6286
        }
6287
6288
        return false;
6289
    }
6290
6291
    /**
6292
     * alias for "UTF8::str_ends_with()"
6293
     *
6294
     * @param string $haystack
6295
     * @param string $needle
6296
     *
6297
     * @psalm-pure
6298
     *
6299
     * @return bool
6300
     *
6301
     * @see        UTF8::str_ends_with()
6302
     * @deprecated <p>please use "UTF8::str_ends_with()"</p>
6303
     */
6304
    public static function str_ends(string $haystack, string $needle): bool
6305
    {
6306 1
        return self::str_ends_with($haystack, $needle);
6307
    }
6308
6309
    /**
6310
     * Check if the string ends with the given substring.
6311
     *
6312
     * EXAMPLE: <code>
6313
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6314
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
6315
     * </code>
6316
     *
6317
     * @param string $haystack <p>The string to search in.</p>
6318
     * @param string $needle   <p>The substring to search for.</p>
6319
     *
6320
     * @psalm-pure
6321
     *
6322
     * @return bool
6323
     */
6324
    public static function str_ends_with(string $haystack, string $needle): bool
6325
    {
6326 9
        if ($needle === '') {
6327 2
            return true;
6328
        }
6329
6330 9
        if ($haystack === '') {
6331
            return false;
6332
        }
6333
6334 9
        return \substr($haystack, -\strlen($needle)) === $needle;
6335
    }
6336
6337
    /**
6338
     * Returns true if the string ends with any of $substrings, false otherwise.
6339
     *
6340
     * - case-sensitive
6341
     *
6342
     * @param string   $str        <p>The input string.</p>
6343
     * @param string[] $substrings <p>Substrings to look for.</p>
6344
     *
6345
     * @psalm-pure
6346
     *
6347
     * @return bool
6348
     *              <p>Whether or not $str ends with $substring.</p>
6349
     */
6350
    public static function str_ends_with_any(string $str, array $substrings): bool
6351
    {
6352 7
        if ($substrings === []) {
6353
            return false;
6354
        }
6355
6356 7
        foreach ($substrings as &$substring) {
6357 7
            if (\substr($str, -\strlen($substring)) === $substring) {
6358 7
                return true;
6359
            }
6360
        }
6361
6362 6
        return false;
6363
    }
6364
6365
    /**
6366
     * Ensures that the string begins with $substring. If it doesn't, it's
6367
     * prepended.
6368
     *
6369
     * @param string $str       <p>The input string.</p>
6370
     * @param string $substring <p>The substring to add if not present.</p>
6371
     *
6372
     * @psalm-pure
6373
     *
6374
     * @return string
6375
     */
6376
    public static function str_ensure_left(string $str, string $substring): string
6377
    {
6378
        if (
6379 10
            $substring !== ''
6380
            &&
6381 10
            \strpos($str, $substring) === 0
6382
        ) {
6383 6
            return $str;
6384
        }
6385
6386 4
        return $substring . $str;
6387
    }
6388
6389
    /**
6390
     * Ensures that the string ends with $substring. If it doesn't, it's appended.
6391
     *
6392
     * @param string $str       <p>The input string.</p>
6393
     * @param string $substring <p>The substring to add if not present.</p>
6394
     *
6395
     * @psalm-pure
6396
     *
6397
     * @return string
6398
     */
6399
    public static function str_ensure_right(string $str, string $substring): string
6400
    {
6401
        if (
6402 10
            $str === ''
6403
            ||
6404 10
            $substring === ''
6405
            ||
6406 10
            \substr($str, -\strlen($substring)) !== $substring
6407
        ) {
6408 4
            $str .= $substring;
6409
        }
6410
6411 10
        return $str;
6412
    }
6413
6414
    /**
6415
     * Capitalizes the first word of the string, replaces underscores with
6416
     * spaces, and strips '_id'.
6417
     *
6418
     * @param string $str
6419
     *
6420
     * @psalm-pure
6421
     *
6422
     * @return string
6423
     */
6424
    public static function str_humanize($str): string
6425
    {
6426 3
        $str = \str_replace(
6427
            [
6428 3
                '_id',
6429
                '_',
6430
            ],
6431
            [
6432 3
                '',
6433
                ' ',
6434
            ],
6435 3
            $str
6436
        );
6437
6438 3
        return self::ucfirst(\trim($str));
6439
    }
6440
6441
    /**
6442
     * alias for "UTF8::str_istarts_with()"
6443
     *
6444
     * @param string $haystack
6445
     * @param string $needle
6446
     *
6447
     * @psalm-pure
6448
     *
6449
     * @return bool
6450
     *
6451
     * @see        UTF8::str_istarts_with()
6452
     * @deprecated <p>please use "UTF8::str_istarts_with()"</p>
6453
     */
6454
    public static function str_ibegins(string $haystack, string $needle): bool
6455
    {
6456 1
        return self::str_istarts_with($haystack, $needle);
6457
    }
6458
6459
    /**
6460
     * alias for "UTF8::str_iends_with()"
6461
     *
6462
     * @param string $haystack
6463
     * @param string $needle
6464
     *
6465
     * @psalm-pure
6466
     *
6467
     * @return bool
6468
     *
6469
     * @see        UTF8::str_iends_with()
6470
     * @deprecated <p>please use "UTF8::str_iends_with()"</p>
6471
     */
6472
    public static function str_iends(string $haystack, string $needle): bool
6473
    {
6474 1
        return self::str_iends_with($haystack, $needle);
6475
    }
6476
6477
    /**
6478
     * Check if the string ends with the given substring, case-insensitive.
6479
     *
6480
     * EXAMPLE: <code>
6481
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
6482
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
6483
     * </code>
6484
     *
6485
     * @param string $haystack <p>The string to search in.</p>
6486
     * @param string $needle   <p>The substring to search for.</p>
6487
     *
6488
     * @psalm-pure
6489
     *
6490
     * @return bool
6491
     */
6492
    public static function str_iends_with(string $haystack, string $needle): bool
6493
    {
6494 12
        if ($needle === '') {
6495 2
            return true;
6496
        }
6497
6498 12
        if ($haystack === '') {
6499
            return false;
6500
        }
6501
6502 12
        return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0;
6503
    }
6504
6505
    /**
6506
     * Returns true if the string ends with any of $substrings, false otherwise.
6507
     *
6508
     * - case-insensitive
6509
     *
6510
     * @param string   $str        <p>The input string.</p>
6511
     * @param string[] $substrings <p>Substrings to look for.</p>
6512
     *
6513
     * @psalm-pure
6514
     *
6515
     * @return bool
6516
     *              <p>Whether or not $str ends with $substring.</p>
6517
     */
6518
    public static function str_iends_with_any(string $str, array $substrings): bool
6519
    {
6520 4
        if ($substrings === []) {
6521
            return false;
6522
        }
6523
6524 4
        foreach ($substrings as &$substring) {
6525 4
            if (self::str_iends_with($str, $substring)) {
6526 4
                return true;
6527
            }
6528
        }
6529
6530
        return false;
6531
    }
6532
6533
    /**
6534
     * Returns the index of the first occurrence of $needle in the string,
6535
     * and false if not found. Accepts an optional offset from which to begin
6536
     * the search.
6537
     *
6538
     * @param string $str      <p>The input string.</p>
6539
     * @param string $needle   <p>Substring to look for.</p>
6540
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6541
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6542
     *
6543
     * @psalm-pure
6544
     *
6545
     * @return false|int
6546
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6547
     *
6548
     * @see        UTF8::stripos()
6549
     * @deprecated <p>please use "UTF8::stripos()"</p>
6550
     */
6551
    public static function str_iindex_first(
6552
        string $str,
6553
        string $needle,
6554
        int $offset = 0,
6555
        string $encoding = 'UTF-8'
6556
    ) {
6557 1
        return self::stripos(
6558 1
            $str,
6559 1
            $needle,
6560 1
            $offset,
6561 1
            $encoding
6562
        );
6563
    }
6564
6565
    /**
6566
     * Returns the index of the last occurrence of $needle in the string,
6567
     * and false if not found. Accepts an optional offset from which to begin
6568
     * the search. Offsets may be negative to count from the last character
6569
     * in the string.
6570
     *
6571
     * @param string $str      <p>The input string.</p>
6572
     * @param string $needle   <p>Substring to look for.</p>
6573
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6574
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6575
     *
6576
     * @psalm-pure
6577
     *
6578
     * @return false|int
6579
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6580
     *
6581
     * @see        UTF8::strripos()
6582
     * @deprecated <p>please use "UTF8::strripos()"</p>
6583
     */
6584
    public static function str_iindex_last(
6585
        string $str,
6586
        string $needle,
6587
        int $offset = 0,
6588
        string $encoding = 'UTF-8'
6589
    ) {
6590 10
        return self::strripos(
6591 10
            $str,
6592 10
            $needle,
6593 10
            $offset,
6594 10
            $encoding
6595
        );
6596
    }
6597
6598
    /**
6599
     * Returns the index of the first occurrence of $needle in the string,
6600
     * and false if not found. Accepts an optional offset from which to begin
6601
     * the search.
6602
     *
6603
     * @param string $str      <p>The input string.</p>
6604
     * @param string $needle   <p>Substring to look for.</p>
6605
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6606
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6607
     *
6608
     * @psalm-pure
6609
     *
6610
     * @return false|int
6611
     *                   <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6612
     *
6613
     * @see        UTF8::strpos()
6614
     * @deprecated <p>please use "UTF8::strpos()"</p>
6615
     */
6616
    public static function str_index_first(
6617
        string $str,
6618
        string $needle,
6619
        int $offset = 0,
6620
        string $encoding = 'UTF-8'
6621
    ) {
6622 11
        return self::strpos(
6623 11
            $str,
6624 11
            $needle,
6625 11
            $offset,
6626 11
            $encoding
6627
        );
6628
    }
6629
6630
    /**
6631
     * Returns the index of the last occurrence of $needle in the string,
6632
     * and false if not found. Accepts an optional offset from which to begin
6633
     * the search. Offsets may be negative to count from the last character
6634
     * in the string.
6635
     *
6636
     * @param string $str      <p>The input string.</p>
6637
     * @param string $needle   <p>Substring to look for.</p>
6638
     * @param int    $offset   [optional] <p>Offset from which to search. Default: 0</p>
6639
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
6640
     *
6641
     * @psalm-pure
6642
     *
6643
     * @return false|int
6644
     *                   <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p>
6645
     *
6646
     * @see        UTF8::strrpos()
6647
     * @deprecated <p>please use "UTF8::strrpos()"</p>
6648
     */
6649
    public static function str_index_last(
6650
        string $str,
6651
        string $needle,
6652
        int $offset = 0,
6653
        string $encoding = 'UTF-8'
6654
    ) {
6655 10
        return self::strrpos(
6656 10
            $str,
6657 10
            $needle,
6658 10
            $offset,
6659 10
            $encoding
6660
        );
6661
    }
6662
6663
    /**
6664
     * Inserts $substring into the string at the $index provided.
6665
     *
6666
     * @param string $str       <p>The input string.</p>
6667
     * @param string $substring <p>String to be inserted.</p>
6668
     * @param int    $index     <p>The index at which to insert the substring.</p>
6669
     * @param string $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
6670
     *
6671
     * @psalm-pure
6672
     *
6673
     * @return string
6674
     */
6675
    public static function str_insert(
6676
        string $str,
6677
        string $substring,
6678
        int $index,
6679
        string $encoding = 'UTF-8'
6680
    ): string {
6681 8
        if ($encoding === 'UTF-8') {
6682 4
            $len = (int) \mb_strlen($str);
6683 4
            if ($index > $len) {
6684
                return $str;
6685
            }
6686
6687
            /** @noinspection UnnecessaryCastingInspection */
6688 4
            return (string) \mb_substr($str, 0, $index) .
6689 4
                   $substring .
6690 4
                   (string) \mb_substr($str, $index, $len);
6691
        }
6692
6693 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
6694
6695 4
        $len = (int) self::strlen($str, $encoding);
6696 4
        if ($index > $len) {
6697 1
            return $str;
6698
        }
6699
6700 3
        return ((string) self::substr($str, 0, $index, $encoding)) .
6701 3
               $substring .
6702 3
               ((string) self::substr($str, $index, $len, $encoding));
6703
    }
6704
6705
    /**
6706
     * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
6707
     *
6708
     * EXAMPLE: <code>
6709
     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
6710
     * </code>
6711
     *
6712
     * @see http://php.net/manual/en/function.str-ireplace.php
6713
     *
6714
     * @param string|string[] $search      <p>
6715
     *                                     Every replacement with search array is
6716
     *                                     performed on the result of previous replacement.
6717
     *                                     </p>
6718
     * @param string|string[] $replacement <p>The replacement.</p>
6719
     * @param string|string[] $subject     <p>
6720
     *                                     If subject is an array, then the search and
6721
     *                                     replace is performed with every entry of
6722
     *                                     subject, and the return value is an array as
6723
     *                                     well.
6724
     *                                     </p>
6725
     * @param int             $count       [optional] <p>
6726
     *                                     The number of matched and replaced needles will
6727
     *                                     be returned in count which is passed by
6728
     *                                     reference.
6729
     *                                     </p>
6730
     *
6731
     * @psalm-pure
6732
     *
6733
     * @return string|string[]
6734
     *                         <p>A string or an array of replacements.</p>
6735
     *
6736
     * @template TStrIReplaceSubject
6737
     * @psalm-param TStrIReplaceSubject $subject
6738
     * @psalm-return TStrIReplaceSubject
6739
     */
6740
    public static function str_ireplace($search, $replacement, $subject, &$count = null)
6741
    {
6742 29
        $search = (array) $search;
6743
6744
        /** @noinspection AlterInForeachInspection */
6745 29
        foreach ($search as &$s) {
6746 29
            $s = (string) $s;
6747 29
            if ($s === '') {
6748 6
                $s = '/^(?<=.)$/';
6749
            } else {
6750 29
                $s = '/' . \preg_quote($s, '/') . '/ui';
6751
            }
6752
        }
6753
6754
        /**
6755
         * @psalm-suppress PossiblyNullArgument
6756
         * @psalm-var TStrIReplaceSubject $subject
6757
         */
6758 29
        $subject = \preg_replace($search, $replacement, $subject, -1, $count);
6759
6760 29
        return $subject;
6761
    }
6762
6763
    /**
6764
     * Replaces $search from the beginning of string with $replacement.
6765
     *
6766
     * @param string $str         <p>The input string.</p>
6767
     * @param string $search      <p>The string to search for.</p>
6768
     * @param string $replacement <p>The replacement.</p>
6769
     *
6770
     * @psalm-pure
6771
     *
6772
     * @return string
6773
     *                <p>The string after the replacement.</p>
6774
     */
6775
    public static function str_ireplace_beginning(string $str, string $search, string $replacement): string
6776
    {
6777 17
        if ($str === '') {
6778 4
            if ($replacement === '') {
6779 2
                return '';
6780
            }
6781
6782 2
            if ($search === '') {
6783 2
                return $replacement;
6784
            }
6785
        }
6786
6787 13
        if ($search === '') {
6788 2
            return $str . $replacement;
6789
        }
6790
6791 11
        if (\stripos($str, $search) === 0) {
6792 10
            return $replacement . \substr($str, \strlen($search));
6793
        }
6794
6795 1
        return $str;
6796
    }
6797
6798
    /**
6799
     * Replaces $search from the ending of string with $replacement.
6800
     *
6801
     * @param string $str         <p>The input string.</p>
6802
     * @param string $search      <p>The string to search for.</p>
6803
     * @param string $replacement <p>The replacement.</p>
6804
     *
6805
     * @psalm-pure
6806
     *
6807
     * @return string
6808
     *                <p>The string after the replacement.</p>
6809
     */
6810
    public static function str_ireplace_ending(string $str, string $search, string $replacement): string
6811
    {
6812 17
        if ($str === '') {
6813 4
            if ($replacement === '') {
6814 2
                return '';
6815
            }
6816
6817 2
            if ($search === '') {
6818 2
                return $replacement;
6819
            }
6820
        }
6821
6822 13
        if ($search === '') {
6823 2
            return $str . $replacement;
6824
        }
6825
6826 11
        if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) {
6827 9
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
6828
        }
6829
6830 11
        return $str;
6831
    }
6832
6833
    /**
6834
     * Check if the string starts with the given substring, case-insensitive.
6835
     *
6836
     * EXAMPLE: <code>
6837
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
6838
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
6839
     * </code>
6840
     *
6841
     * @param string $haystack <p>The string to search in.</p>
6842
     * @param string $needle   <p>The substring to search for.</p>
6843
     *
6844
     * @psalm-pure
6845
     *
6846
     * @return bool
6847
     */
6848
    public static function str_istarts_with(string $haystack, string $needle): bool
6849
    {
6850 13
        if ($needle === '') {
6851 2
            return true;
6852
        }
6853
6854 13
        if ($haystack === '') {
6855
            return false;
6856
        }
6857
6858 13
        return self::stripos($haystack, $needle) === 0;
6859
    }
6860
6861
    /**
6862
     * Returns true if the string begins with any of $substrings, false otherwise.
6863
     *
6864
     * - case-insensitive
6865
     *
6866
     * @param string $str        <p>The input string.</p>
6867
     * @param array  $substrings <p>Substrings to look for.</p>
6868
     *
6869
     * @psalm-pure
6870
     *
6871
     * @return bool
6872
     *              <p>Whether or not $str starts with $substring.</p>
6873
     */
6874
    public static function str_istarts_with_any(string $str, array $substrings): bool
6875
    {
6876 5
        if ($str === '') {
6877
            return false;
6878
        }
6879
6880 5
        if ($substrings === []) {
6881
            return false;
6882
        }
6883
6884 5
        foreach ($substrings as &$substring) {
6885 5
            if (self::str_istarts_with($str, $substring)) {
6886 5
                return true;
6887
            }
6888
        }
6889
6890 1
        return false;
6891
    }
6892
6893
    /**
6894
     * Gets the substring after the first occurrence of a separator.
6895
     *
6896
     * @param string $str       <p>The input string.</p>
6897
     * @param string $separator <p>The string separator.</p>
6898
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6899
     *
6900
     * @psalm-pure
6901
     *
6902
     * @return string
6903
     */
6904
    public static function str_isubstr_after_first_separator(
6905
        string $str,
6906
        string $separator,
6907
        string $encoding = 'UTF-8'
6908
    ): string {
6909 1
        if ($separator === '' || $str === '') {
6910 1
            return '';
6911
        }
6912
6913 1
        $offset = self::stripos($str, $separator);
6914 1
        if ($offset === false) {
6915 1
            return '';
6916
        }
6917
6918 1
        if ($encoding === 'UTF-8') {
6919 1
            return (string) \mb_substr(
6920 1
                $str,
6921 1
                $offset + (int) \mb_strlen($separator)
6922
            );
6923
        }
6924
6925
        return (string) self::substr(
6926
            $str,
6927
            $offset + (int) self::strlen($separator, $encoding),
6928
            null,
6929
            $encoding
6930
        );
6931
    }
6932
6933
    /**
6934
     * Gets the substring after the last occurrence of a separator.
6935
     *
6936
     * @param string $str       <p>The input string.</p>
6937
     * @param string $separator <p>The string separator.</p>
6938
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6939
     *
6940
     * @psalm-pure
6941
     *
6942
     * @return string
6943
     */
6944
    public static function str_isubstr_after_last_separator(
6945
        string $str,
6946
        string $separator,
6947
        string $encoding = 'UTF-8'
6948
    ): string {
6949 1
        if ($separator === '' || $str === '') {
6950 1
            return '';
6951
        }
6952
6953 1
        $offset = self::strripos($str, $separator);
6954 1
        if ($offset === false) {
6955 1
            return '';
6956
        }
6957
6958 1
        if ($encoding === 'UTF-8') {
6959 1
            return (string) \mb_substr(
6960 1
                $str,
6961 1
                $offset + (int) self::strlen($separator)
6962
            );
6963
        }
6964
6965
        return (string) self::substr(
6966
            $str,
6967
            $offset + (int) self::strlen($separator, $encoding),
6968
            null,
6969
            $encoding
6970
        );
6971
    }
6972
6973
    /**
6974
     * Gets the substring before the first occurrence of a separator.
6975
     *
6976
     * @param string $str       <p>The input string.</p>
6977
     * @param string $separator <p>The string separator.</p>
6978
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
6979
     *
6980
     * @psalm-pure
6981
     *
6982
     * @return string
6983
     */
6984
    public static function str_isubstr_before_first_separator(
6985
        string $str,
6986
        string $separator,
6987
        string $encoding = 'UTF-8'
6988
    ): string {
6989 1
        if ($separator === '' || $str === '') {
6990 1
            return '';
6991
        }
6992
6993 1
        $offset = self::stripos($str, $separator);
6994 1
        if ($offset === false) {
6995 1
            return '';
6996
        }
6997
6998 1
        if ($encoding === 'UTF-8') {
6999 1
            return (string) \mb_substr($str, 0, $offset);
7000
        }
7001
7002
        return (string) self::substr($str, 0, $offset, $encoding);
7003
    }
7004
7005
    /**
7006
     * Gets the substring before the last occurrence of a separator.
7007
     *
7008
     * @param string $str       <p>The input string.</p>
7009
     * @param string $separator <p>The string separator.</p>
7010
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
7011
     *
7012
     * @psalm-pure
7013
     *
7014
     * @return string
7015
     */
7016
    public static function str_isubstr_before_last_separator(
7017
        string $str,
7018
        string $separator,
7019
        string $encoding = 'UTF-8'
7020
    ): string {
7021 1
        if ($separator === '' || $str === '') {
7022 1
            return '';
7023
        }
7024
7025 1
        if ($encoding === 'UTF-8') {
7026 1
            $offset = \mb_strripos($str, $separator);
7027 1
            if ($offset === false) {
7028 1
                return '';
7029
            }
7030
7031 1
            return (string) \mb_substr($str, 0, $offset);
7032
        }
7033
7034
        $offset = self::strripos($str, $separator, 0, $encoding);
7035
        if ($offset === false) {
7036
            return '';
7037
        }
7038
7039
        return (string) self::substr($str, 0, $offset, $encoding);
7040
    }
7041
7042
    /**
7043
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
7044
     *
7045
     * @param string $str           <p>The input string.</p>
7046
     * @param string $needle        <p>The string to look for.</p>
7047
     * @param bool   $before_needle [optional] <p>Default: false</p>
7048
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7049
     *
7050
     * @psalm-pure
7051
     *
7052
     * @return string
7053
     */
7054
    public static function str_isubstr_first(
7055
        string $str,
7056
        string $needle,
7057
        bool $before_needle = false,
7058
        string $encoding = 'UTF-8'
7059
    ): string {
7060
        if (
7061 2
            $needle === ''
7062
            ||
7063 2
            $str === ''
7064
        ) {
7065 2
            return '';
7066
        }
7067
7068 2
        $part = self::stristr(
7069 2
            $str,
7070 2
            $needle,
7071 2
            $before_needle,
7072 2
            $encoding
7073
        );
7074 2
        if ($part === false) {
7075 2
            return '';
7076
        }
7077
7078 2
        return $part;
7079
    }
7080
7081
    /**
7082
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
7083
     *
7084
     * @param string $str           <p>The input string.</p>
7085
     * @param string $needle        <p>The string to look for.</p>
7086
     * @param bool   $before_needle [optional] <p>Default: false</p>
7087
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
7088
     *
7089
     * @psalm-pure
7090
     *
7091
     * @return string
7092
     */
7093
    public static function str_isubstr_last(
7094
        string $str,
7095
        string $needle,
7096
        bool $before_needle = false,
7097
        string $encoding = 'UTF-8'
7098
    ): string {
7099
        if (
7100 1
            $needle === ''
7101
            ||
7102 1
            $str === ''
7103
        ) {
7104 1
            return '';
7105
        }
7106
7107 1
        $part = self::strrichr(
7108 1
            $str,
7109 1
            $needle,
7110 1
            $before_needle,
7111 1
            $encoding
7112
        );
7113 1
        if ($part === false) {
7114 1
            return '';
7115
        }
7116
7117 1
        return $part;
7118
    }
7119
7120
    /**
7121
     * Returns the last $n characters of the string.
7122
     *
7123
     * @param string $str      <p>The input string.</p>
7124
     * @param int    $n        <p>Number of characters to retrieve from the end.</p>
7125
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7126
     *
7127
     * @psalm-pure
7128
     *
7129
     * @return string
7130
     */
7131
    public static function str_last_char(
7132
        string $str,
7133
        int $n = 1,
7134
        string $encoding = 'UTF-8'
7135
    ): string {
7136 12
        if ($str === '' || $n <= 0) {
7137 4
            return '';
7138
        }
7139
7140 8
        if ($encoding === 'UTF-8') {
7141 4
            return (string) \mb_substr($str, -$n);
7142
        }
7143
7144 4
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7145
7146 4
        return (string) self::substr($str, -$n, null, $encoding);
7147
    }
7148
7149
    /**
7150
     * Limit the number of characters in a string.
7151
     *
7152
     * @param string $str        <p>The input string.</p>
7153
     * @param int    $length     [optional] <p>Default: 100</p>
7154
     * @param string $str_add_on [optional] <p>Default: …</p>
7155
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7156
     *
7157
     * @psalm-pure
7158
     *
7159
     * @return string
7160
     */
7161
    public static function str_limit(
7162
        string $str,
7163
        int $length = 100,
7164
        string $str_add_on = '…',
7165
        string $encoding = 'UTF-8'
7166
    ): string {
7167 2
        if ($str === '' || $length <= 0) {
7168 2
            return '';
7169
        }
7170
7171 2
        if ($encoding === 'UTF-8') {
7172 2
            if ((int) \mb_strlen($str) <= $length) {
7173 2
                return $str;
7174
            }
7175
7176
            /** @noinspection UnnecessaryCastingInspection */
7177 2
            return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on;
7178
        }
7179
7180
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7181
7182
        if ((int) self::strlen($str, $encoding) <= $length) {
7183
            return $str;
7184
        }
7185
7186
        return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on;
7187
    }
7188
7189
    /**
7190
     * Limit the number of characters in a string, but also after the next word.
7191
     *
7192
     * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code>
7193
     *
7194
     * @param string $str        <p>The input string.</p>
7195
     * @param int    $length     [optional] <p>Default: 100</p>
7196
     * @param string $str_add_on [optional] <p>Default: …</p>
7197
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
7198
     *
7199
     * @psalm-pure
7200
     *
7201
     * @return string
7202
     */
7203
    public static function str_limit_after_word(
7204
        string $str,
7205
        int $length = 100,
7206
        string $str_add_on = '…',
7207
        string $encoding = 'UTF-8'
7208
    ): string {
7209 6
        if ($str === '' || $length <= 0) {
7210 2
            return '';
7211
        }
7212
7213 6
        if ($encoding === 'UTF-8') {
7214
            /** @noinspection UnnecessaryCastingInspection */
7215 2
            if ((int) \mb_strlen($str) <= $length) {
7216 2
                return $str;
7217
            }
7218
7219 2
            if (\mb_substr($str, $length - 1, 1) === ' ') {
7220 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7221
            }
7222
7223 2
            $str = \mb_substr($str, 0, $length);
7224
7225 2
            $array = \explode(' ', $str, -1);
7226 2
            $new_str = \implode(' ', $array);
7227
7228 2
            if ($new_str === '') {
7229 2
                return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on;
7230
            }
7231
        } else {
7232 4
            if ((int) self::strlen($str, $encoding) <= $length) {
7233
                return $str;
7234
            }
7235
7236 4
            if (self::substr($str, $length - 1, 1, $encoding) === ' ') {
7237 3
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7238
            }
7239
7240
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7241 1
            $str = self::substr($str, 0, $length, $encoding);
7242
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
7243 1
            if ($str === false) {
7244
                return '' . $str_add_on;
7245
            }
7246
7247 1
            $array = \explode(' ', $str, -1);
7248 1
            $new_str = \implode(' ', $array);
7249
7250 1
            if ($new_str === '') {
7251
                return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on;
7252
            }
7253
        }
7254
7255 3
        return $new_str . $str_add_on;
7256
    }
7257
7258
    /**
7259
     * Returns the longest common prefix between the $str1 and $str2.
7260
     *
7261
     * @param string $str1     <p>The input sting.</p>
7262
     * @param string $str2     <p>Second string for comparison.</p>
7263
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7264
     *
7265
     * @psalm-pure
7266
     *
7267
     * @return string
7268
     */
7269
    public static function str_longest_common_prefix(
7270
        string $str1,
7271
        string $str2,
7272
        string $encoding = 'UTF-8'
7273
    ): string {
7274
        // init
7275 10
        $longest_common_prefix = '';
7276
7277 10
        if ($encoding === 'UTF-8') {
7278 5
            $max_length = (int) \min(
7279 5
                \mb_strlen($str1),
7280 5
                \mb_strlen($str2)
7281
            );
7282
7283 5
            for ($i = 0; $i < $max_length; ++$i) {
7284 4
                $char = \mb_substr($str1, $i, 1);
7285
7286
                if (
7287 4
                    $char !== false
7288
                    &&
7289 4
                    $char === \mb_substr($str2, $i, 1)
7290
                ) {
7291 3
                    $longest_common_prefix .= $char;
7292
                } else {
7293 3
                    break;
7294
                }
7295
            }
7296
        } else {
7297 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7298
7299 5
            $max_length = (int) \min(
7300 5
                self::strlen($str1, $encoding),
7301 5
                self::strlen($str2, $encoding)
7302
            );
7303
7304 5
            for ($i = 0; $i < $max_length; ++$i) {
7305 4
                $char = self::substr($str1, $i, 1, $encoding);
7306
7307
                if (
7308 4
                    $char !== false
7309
                    &&
7310 4
                    $char === self::substr($str2, $i, 1, $encoding)
7311
                ) {
7312 3
                    $longest_common_prefix .= $char;
7313
                } else {
7314 3
                    break;
7315
                }
7316
            }
7317
        }
7318
7319 10
        return $longest_common_prefix;
7320
    }
7321
7322
    /**
7323
     * Returns the longest common substring between the $str1 and $str2.
7324
     * In the case of ties, it returns that which occurs first.
7325
     *
7326
     * @param string $str1
7327
     * @param string $str2     <p>Second string for comparison.</p>
7328
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7329
     *
7330
     * @psalm-pure
7331
     *
7332
     * @return string
7333
     *                <p>A string with its $str being the longest common substring.</p>
7334
     */
7335
    public static function str_longest_common_substring(
7336
        string $str1,
7337
        string $str2,
7338
        string $encoding = 'UTF-8'
7339
    ): string {
7340 11
        if ($str1 === '' || $str2 === '') {
7341 2
            return '';
7342
        }
7343
7344
        // Uses dynamic programming to solve
7345
        // http://en.wikipedia.org/wiki/Longest_common_substring_problem
7346
7347 9
        if ($encoding === 'UTF-8') {
7348 4
            $str_length = (int) \mb_strlen($str1);
7349 4
            $other_length = (int) \mb_strlen($str2);
7350
        } else {
7351 5
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7352
7353 5
            $str_length = (int) self::strlen($str1, $encoding);
7354 5
            $other_length = (int) self::strlen($str2, $encoding);
7355
        }
7356
7357
        // Return if either string is empty
7358 9
        if ($str_length === 0 || $other_length === 0) {
7359
            return '';
7360
        }
7361
7362 9
        $len = 0;
7363 9
        $end = 0;
7364 9
        $table = \array_fill(
7365 9
            0,
7366 9
            $str_length + 1,
7367 9
            \array_fill(0, $other_length + 1, 0)
7368
        );
7369
7370 9
        if ($encoding === 'UTF-8') {
7371 9
            for ($i = 1; $i <= $str_length; ++$i) {
7372 9
                for ($j = 1; $j <= $other_length; ++$j) {
7373 9
                    $str_char = \mb_substr($str1, $i - 1, 1);
7374 9
                    $other_char = \mb_substr($str2, $j - 1, 1);
7375
7376 9
                    if ($str_char === $other_char) {
7377 8
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7378 8
                        if ($table[$i][$j] > $len) {
7379 8
                            $len = $table[$i][$j];
7380 8
                            $end = $i;
7381
                        }
7382
                    } else {
7383 9
                        $table[$i][$j] = 0;
7384
                    }
7385
                }
7386
            }
7387
        } else {
7388
            for ($i = 1; $i <= $str_length; ++$i) {
7389
                for ($j = 1; $j <= $other_length; ++$j) {
7390
                    $str_char = self::substr($str1, $i - 1, 1, $encoding);
7391
                    $other_char = self::substr($str2, $j - 1, 1, $encoding);
7392
7393
                    if ($str_char === $other_char) {
7394
                        $table[$i][$j] = $table[$i - 1][$j - 1] + 1;
7395
                        if ($table[$i][$j] > $len) {
7396
                            $len = $table[$i][$j];
7397
                            $end = $i;
7398
                        }
7399
                    } else {
7400
                        $table[$i][$j] = 0;
7401
                    }
7402
                }
7403
            }
7404
        }
7405
7406 9
        if ($encoding === 'UTF-8') {
7407 9
            return (string) \mb_substr($str1, $end - $len, $len);
7408
        }
7409
7410
        return (string) self::substr($str1, $end - $len, $len, $encoding);
7411
    }
7412
7413
    /**
7414
     * Returns the longest common suffix between the $str1 and $str2.
7415
     *
7416
     * @param string $str1
7417
     * @param string $str2     <p>Second string for comparison.</p>
7418
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7419
     *
7420
     * @psalm-pure
7421
     *
7422
     * @return string
7423
     */
7424
    public static function str_longest_common_suffix(
7425
        string $str1,
7426
        string $str2,
7427
        string $encoding = 'UTF-8'
7428
    ): string {
7429 10
        if ($str1 === '' || $str2 === '') {
7430 2
            return '';
7431
        }
7432
7433 8
        if ($encoding === 'UTF-8') {
7434 4
            $max_length = (int) \min(
7435 4
                \mb_strlen($str1, $encoding),
7436 4
                \mb_strlen($str2, $encoding)
7437
            );
7438
7439 4
            $longest_common_suffix = '';
7440 4
            for ($i = 1; $i <= $max_length; ++$i) {
7441 4
                $char = \mb_substr($str1, -$i, 1);
7442
7443
                if (
7444 4
                    $char !== false
7445
                    &&
7446 4
                    $char === \mb_substr($str2, -$i, 1)
7447
                ) {
7448 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7449
                } else {
7450 3
                    break;
7451
                }
7452
            }
7453
        } else {
7454 4
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
7455
7456 4
            $max_length = (int) \min(
7457 4
                self::strlen($str1, $encoding),
7458 4
                self::strlen($str2, $encoding)
7459
            );
7460
7461 4
            $longest_common_suffix = '';
7462 4
            for ($i = 1; $i <= $max_length; ++$i) {
7463 4
                $char = self::substr($str1, -$i, 1, $encoding);
7464
7465
                if (
7466 4
                    $char !== false
7467
                    &&
7468 4
                    $char === self::substr($str2, -$i, 1, $encoding)
7469
                ) {
7470 3
                    $longest_common_suffix = $char . $longest_common_suffix;
7471
                } else {
7472 3
                    break;
7473
                }
7474
            }
7475
        }
7476
7477 8
        return $longest_common_suffix;
7478
    }
7479
7480
    /**
7481
     * Returns true if $str matches the supplied pattern, false otherwise.
7482
     *
7483
     * @param string $str     <p>The input string.</p>
7484
     * @param string $pattern <p>Regex pattern to match against.</p>
7485
     *
7486
     * @psalm-pure
7487
     *
7488
     * @return bool
7489
     *              <p>Whether or not $str matches the pattern.</p>
7490
     */
7491
    public static function str_matches_pattern(string $str, string $pattern): bool
7492
    {
7493 10
        return (bool) \preg_match('/' . $pattern . '/u', $str);
7494
    }
7495
7496
    /**
7497
     * Returns whether or not a character exists at an index. Offsets may be
7498
     * negative to count from the last character in the string. Implements
7499
     * part of the ArrayAccess interface.
7500
     *
7501
     * @param string $str      <p>The input string.</p>
7502
     * @param int    $offset   <p>The index to check.</p>
7503
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7504
     *
7505
     * @psalm-pure
7506
     *
7507
     * @return bool
7508
     *              <p>Whether or not the index exists.</p>
7509
     */
7510
    public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool
7511
    {
7512
        // init
7513 6
        $length = (int) self::strlen($str, $encoding);
7514
7515 6
        if ($offset >= 0) {
7516 3
            return $length > $offset;
7517
        }
7518
7519 3
        return $length >= \abs($offset);
7520
    }
7521
7522
    /**
7523
     * Returns the character at the given index. Offsets may be negative to
7524
     * count from the last character in the string. Implements part of the
7525
     * ArrayAccess interface, and throws an OutOfBoundsException if the index
7526
     * does not exist.
7527
     *
7528
     * @param string $str      <p>The input string.</p>
7529
     * @param int    $index    <p>The <strong>index</strong> from which to retrieve the char.</p>
7530
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7531
     *
7532
     * @throws \OutOfBoundsException if the positive or negative offset does not exist
7533
     *
7534
     * @return string
7535
     *                <p>The character at the specified index.</p>
7536
     *
7537
     * @psalm-pure
7538
     */
7539
    public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string
7540
    {
7541
        // init
7542 2
        $length = (int) self::strlen($str);
7543
7544
        if (
7545 2
            ($index >= 0 && $length <= $index)
7546
            ||
7547 2
            $length < \abs($index)
7548
        ) {
7549 1
            throw new \OutOfBoundsException('No character exists at the index');
7550
        }
7551
7552 1
        return self::char_at($str, $index, $encoding);
7553
    }
7554
7555
    /**
7556
     * Pad a UTF-8 string to a given length with another string.
7557
     *
7558
     * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code>
7559
     *
7560
     * @param string     $str        <p>The input string.</p>
7561
     * @param int        $pad_length <p>The length of return string.</p>
7562
     * @param string     $pad_string [optional] <p>String to use for padding the input string.</p>
7563
     * @param int|string $pad_type   [optional] <p>
7564
     *                               Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br>
7565
     *                               <strong>STR_PAD_LEFT</strong> [or string "left"] or<br>
7566
     *                               <strong>STR_PAD_BOTH</strong> [or string "both"]
7567
     *                               </p>
7568
     * @param string     $encoding   [optional] <p>Default: 'UTF-8'</p>
7569
     *
7570
     * @psalm-pure
7571
     *
7572
     * @return string
7573
     *                <p>Returns the padded string.</p>
7574
     */
7575
    public static function str_pad(
7576
        string $str,
7577
        int $pad_length,
7578
        string $pad_string = ' ',
7579
        $pad_type = \STR_PAD_RIGHT,
7580
        string $encoding = 'UTF-8'
7581
    ): string {
7582 41
        if ($pad_length === 0 || $pad_string === '') {
7583 1
            return $str;
7584
        }
7585
7586 41
        if ($pad_type !== (int) $pad_type) {
7587 13
            if ($pad_type === 'left') {
7588 3
                $pad_type = \STR_PAD_LEFT;
7589 10
            } elseif ($pad_type === 'right') {
7590 6
                $pad_type = \STR_PAD_RIGHT;
7591 4
            } elseif ($pad_type === 'both') {
7592 3
                $pad_type = \STR_PAD_BOTH;
7593
            } else {
7594 1
                throw new \InvalidArgumentException(
7595 1
                    'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'"
7596
                );
7597
            }
7598
        }
7599
7600 40
        if ($encoding === 'UTF-8') {
7601 25
            $str_length = (int) \mb_strlen($str);
7602
7603 25
            if ($pad_length >= $str_length) {
7604
                switch ($pad_type) {
7605 25
                    case \STR_PAD_LEFT:
7606 8
                        $ps_length = (int) \mb_strlen($pad_string);
7607
7608 8
                        $diff = ($pad_length - $str_length);
7609
7610 8
                        $pre = (string) \mb_substr(
7611 8
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7612 8
                            0,
7613 8
                            $diff
7614
                        );
7615 8
                        $post = '';
7616
7617 8
                        break;
7618
7619 20
                    case \STR_PAD_BOTH:
7620 14
                        $diff = ($pad_length - $str_length);
7621
7622 14
                        $ps_length_left = (int) \floor($diff / 2);
7623
7624 14
                        $ps_length_right = (int) \ceil($diff / 2);
7625
7626 14
                        $pre = (string) \mb_substr(
7627 14
                            \str_repeat($pad_string, $ps_length_left),
7628 14
                            0,
7629 14
                            $ps_length_left
7630
                        );
7631 14
                        $post = (string) \mb_substr(
7632 14
                            \str_repeat($pad_string, $ps_length_right),
7633 14
                            0,
7634 14
                            $ps_length_right
7635
                        );
7636
7637 14
                        break;
7638
7639 9
                    case \STR_PAD_RIGHT:
7640
                    default:
7641 9
                        $ps_length = (int) \mb_strlen($pad_string);
7642
7643 9
                        $diff = ($pad_length - $str_length);
7644
7645 9
                        $post = (string) \mb_substr(
7646 9
                            \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7647 9
                            0,
7648 9
                            $diff
7649
                        );
7650 9
                        $pre = '';
7651
                }
7652
7653 25
                return $pre . $str . $post;
7654
            }
7655
7656 3
            return $str;
7657
        }
7658
7659 15
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
7660
7661 15
        $str_length = (int) self::strlen($str, $encoding);
7662
7663 15
        if ($pad_length >= $str_length) {
7664
            switch ($pad_type) {
7665 14
                case \STR_PAD_LEFT:
7666 5
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7667
7668 5
                    $diff = ($pad_length - $str_length);
7669
7670 5
                    $pre = (string) self::substr(
7671 5
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7672 5
                        0,
7673 5
                        $diff,
7674 5
                        $encoding
7675
                    );
7676 5
                    $post = '';
7677
7678 5
                    break;
7679
7680 9
                case \STR_PAD_BOTH:
7681 3
                    $diff = ($pad_length - $str_length);
7682
7683 3
                    $ps_length_left = (int) \floor($diff / 2);
7684
7685 3
                    $ps_length_right = (int) \ceil($diff / 2);
7686
7687 3
                    $pre = (string) self::substr(
7688 3
                        \str_repeat($pad_string, $ps_length_left),
7689 3
                        0,
7690 3
                        $ps_length_left,
7691 3
                        $encoding
7692
                    );
7693 3
                    $post = (string) self::substr(
7694 3
                        \str_repeat($pad_string, $ps_length_right),
7695 3
                        0,
7696 3
                        $ps_length_right,
7697 3
                        $encoding
7698
                    );
7699
7700 3
                    break;
7701
7702 6
                case \STR_PAD_RIGHT:
7703
                default:
7704 6
                    $ps_length = (int) self::strlen($pad_string, $encoding);
7705
7706 6
                    $diff = ($pad_length - $str_length);
7707
7708 6
                    $post = (string) self::substr(
7709 6
                        \str_repeat($pad_string, (int) \ceil($diff / $ps_length)),
7710 6
                        0,
7711 6
                        $diff,
7712 6
                        $encoding
7713
                    );
7714 6
                    $pre = '';
7715
            }
7716
7717 14
            return $pre . $str . $post;
7718
        }
7719
7720 1
        return $str;
7721
    }
7722
7723
    /**
7724
     * Returns a new string of a given length such that both sides of the
7725
     * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'.
7726
     *
7727
     * @param string $str
7728
     * @param int    $length   <p>Desired string length after padding.</p>
7729
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7730
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7731
     *
7732
     * @psalm-pure
7733
     *
7734
     * @return string
7735
     *                <p>The string with padding applied.</p>
7736
     */
7737
    public static function str_pad_both(
7738
        string $str,
7739
        int $length,
7740
        string $pad_str = ' ',
7741
        string $encoding = 'UTF-8'
7742
    ): string {
7743 11
        return self::str_pad(
7744 11
            $str,
7745 11
            $length,
7746 11
            $pad_str,
7747 11
            \STR_PAD_BOTH,
7748 11
            $encoding
7749
        );
7750
    }
7751
7752
    /**
7753
     * Returns a new string of a given length such that the beginning of the
7754
     * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'.
7755
     *
7756
     * @param string $str
7757
     * @param int    $length   <p>Desired string length after padding.</p>
7758
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7759
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7760
     *
7761
     * @psalm-pure
7762
     *
7763
     * @return string
7764
     *                <p>The string with left padding.</p>
7765
     */
7766
    public static function str_pad_left(
7767
        string $str,
7768
        int $length,
7769
        string $pad_str = ' ',
7770
        string $encoding = 'UTF-8'
7771
    ): string {
7772 7
        return self::str_pad(
7773 7
            $str,
7774 7
            $length,
7775 7
            $pad_str,
7776 7
            \STR_PAD_LEFT,
7777 7
            $encoding
7778
        );
7779
    }
7780
7781
    /**
7782
     * Returns a new string of a given length such that the end of the string
7783
     * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'.
7784
     *
7785
     * @param string $str
7786
     * @param int    $length   <p>Desired string length after padding.</p>
7787
     * @param string $pad_str  [optional] <p>String used to pad, defaults to space. Default: ' '</p>
7788
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
7789
     *
7790
     * @psalm-pure
7791
     *
7792
     * @return string
7793
     *                <p>The string with right padding.</p>
7794
     */
7795
    public static function str_pad_right(
7796
        string $str,
7797
        int $length,
7798
        string $pad_str = ' ',
7799
        string $encoding = 'UTF-8'
7800
    ): string {
7801 7
        return self::str_pad(
7802 7
            $str,
7803 7
            $length,
7804 7
            $pad_str,
7805 7
            \STR_PAD_RIGHT,
7806 7
            $encoding
7807
        );
7808
    }
7809
7810
    /**
7811
     * Repeat a string.
7812
     *
7813
     * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code>
7814
     *
7815
     * @param string $str        <p>
7816
     *                           The string to be repeated.
7817
     *                           </p>
7818
     * @param int    $multiplier <p>
7819
     *                           Number of time the input string should be
7820
     *                           repeated.
7821
     *                           </p>
7822
     *                           <p>
7823
     *                           multiplier has to be greater than or equal to 0.
7824
     *                           If the multiplier is set to 0, the function
7825
     *                           will return an empty string.
7826
     *                           </p>
7827
     *
7828
     * @psalm-pure
7829
     *
7830
     * @return string
7831
     *                <p>The repeated string.</p>
7832
     */
7833
    public static function str_repeat(string $str, int $multiplier): string
7834
    {
7835 9
        $str = self::filter($str);
7836
7837 9
        return \str_repeat($str, $multiplier);
7838
    }
7839
7840
    /**
7841
     * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
7842
     *
7843
     * Replace all occurrences of the search string with the replacement string
7844
     *
7845
     * @see http://php.net/manual/en/function.str-replace.php
7846
     *
7847
     * @param string|string[] $search  <p>
7848
     *                                 The value being searched for, otherwise known as the needle.
7849
     *                                 An array may be used to designate multiple needles.
7850
     *                                 </p>
7851
     * @param string|string[] $replace <p>
7852
     *                                 The replacement value that replaces found search
7853
     *                                 values. An array may be used to designate multiple replacements.
7854
     *                                 </p>
7855
     * @param string|string[] $subject <p>
7856
     *                                 The string or array of strings being searched and replaced on,
7857
     *                                 otherwise known as the haystack.
7858
     *                                 </p>
7859
     *                                 <p>
7860
     *                                 If subject is an array, then the search and
7861
     *                                 replace is performed with every entry of
7862
     *                                 subject, and the return value is an array as
7863
     *                                 well.
7864
     *                                 </p>
7865
     * @param int|null $count          [optional] <p>
7866
     *                                 If passed, this will hold the number of matched and replaced needles.
7867
     *                                 </p>
7868
     *
7869
     * @psalm-pure
7870
     *
7871
     * @return string|string[]
7872
     *                         <p>This function returns a string or an array with the replaced values.</p>
7873
     *
7874
     * @template TStrReplaceSubject
7875
     * @psalm-param TStrReplaceSubject $subject
7876
     * @psalm-return TStrReplaceSubject
7877
     *
7878
     * @deprecated please use \str_replace() instead
7879
     */
7880
    public static function str_replace(
7881
        $search,
7882
        $replace,
7883
        $subject,
7884
        int &$count = null
7885
    ) {
7886
        /**
7887
         * @psalm-suppress PossiblyNullArgument
7888
         * @psalm-var TStrReplaceSubject $return;
7889
         */
7890 12
        $return = \str_replace(
7891 12
            $search,
7892 12
            $replace,
7893 12
            $subject,
7894 12
            $count
7895
        );
7896
7897 12
        return $return;
7898
    }
7899
7900
    /**
7901
     * Replaces $search from the beginning of string with $replacement.
7902
     *
7903
     * @param string $str         <p>The input string.</p>
7904
     * @param string $search      <p>The string to search for.</p>
7905
     * @param string $replacement <p>The replacement.</p>
7906
     *
7907
     * @psalm-pure
7908
     *
7909
     * @return string
7910
     *                <p>A string after the replacements.</p>
7911
     */
7912
    public static function str_replace_beginning(
7913
        string $str,
7914
        string $search,
7915
        string $replacement
7916
    ): string {
7917 17
        if ($str === '') {
7918 4
            if ($replacement === '') {
7919 2
                return '';
7920
            }
7921
7922 2
            if ($search === '') {
7923 2
                return $replacement;
7924
            }
7925
        }
7926
7927 13
        if ($search === '') {
7928 2
            return $str . $replacement;
7929
        }
7930
7931 11
        if (\strpos($str, $search) === 0) {
7932 9
            return $replacement . \substr($str, \strlen($search));
7933
        }
7934
7935 2
        return $str;
7936
    }
7937
7938
    /**
7939
     * Replaces $search from the ending of string with $replacement.
7940
     *
7941
     * @param string $str         <p>The input string.</p>
7942
     * @param string $search      <p>The string to search for.</p>
7943
     * @param string $replacement <p>The replacement.</p>
7944
     *
7945
     * @psalm-pure
7946
     *
7947
     * @return string
7948
     *                <p>A string after the replacements.</p>
7949
     */
7950
    public static function str_replace_ending(
7951
        string $str,
7952
        string $search,
7953
        string $replacement
7954
    ): string {
7955 17
        if ($str === '') {
7956 4
            if ($replacement === '') {
7957 2
                return '';
7958
            }
7959
7960 2
            if ($search === '') {
7961 2
                return $replacement;
7962
            }
7963
        }
7964
7965 13
        if ($search === '') {
7966 2
            return $str . $replacement;
7967
        }
7968
7969 11
        if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) {
7970 8
            $str = \substr($str, 0, -\strlen($search)) . $replacement;
7971
        }
7972
7973 11
        return $str;
7974
    }
7975
7976
    /**
7977
     * Replace the first "$search"-term with the "$replace"-term.
7978
     *
7979
     * @param string $search
7980
     * @param string $replace
7981
     * @param string $subject
7982
     *
7983
     * @psalm-pure
7984
     *
7985
     * @return string
7986
     *
7987
     * @psalm-suppress InvalidReturnType
7988
     */
7989
    public static function str_replace_first(
7990
        string $search,
7991
        string $replace,
7992
        string $subject
7993
    ): string {
7994 2
        $pos = self::strpos($subject, $search);
7995
7996 2
        if ($pos !== false) {
7997
            /**
7998
             * @psalm-suppress InvalidReturnStatement
7999
             */
8000 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8001 2
                $subject,
8002 2
                $replace,
8003 2
                $pos,
8004 2
                (int) self::strlen($search)
8005
            );
8006
        }
8007
8008 2
        return $subject;
8009
    }
8010
8011
    /**
8012
     * Replace the last "$search"-term with the "$replace"-term.
8013
     *
8014
     * @param string $search
8015
     * @param string $replace
8016
     * @param string $subject
8017
     *
8018
     * @psalm-pure
8019
     *
8020
     * @return string
8021
     *
8022
     * @psalm-suppress InvalidReturnType
8023
     */
8024
    public static function str_replace_last(
8025
        string $search,
8026
        string $replace,
8027
        string $subject
8028
    ): string {
8029 2
        $pos = self::strrpos($subject, $search);
8030 2
        if ($pos !== false) {
8031
            /**
8032
             * @psalm-suppress InvalidReturnStatement
8033
             */
8034 2
            return self::substr_replace(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::substr_repl...)self::strlen($search)) could return the type string[] which is incompatible with the type-hinted return string. Consider adding an additional type-check to rule them out.
Loading history...
8035 2
                $subject,
8036 2
                $replace,
8037 2
                $pos,
8038 2
                (int) self::strlen($search)
8039
            );
8040
        }
8041
8042 2
        return $subject;
8043
    }
8044
8045
    /**
8046
     * Shuffles all the characters in the string.
8047
     *
8048
     * INFO: uses random algorithm which is weak for cryptography purposes
8049
     *
8050
     * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code>
8051
     *
8052
     * @param string $str      <p>The input string</p>
8053
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8054
     *
8055
     * @return string
8056
     *                <p>The shuffled string.</p>
8057
     */
8058
    public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string
8059
    {
8060 5
        if ($encoding === 'UTF-8') {
8061 5
            $indexes = \range(0, (int) \mb_strlen($str) - 1);
8062
            /** @noinspection NonSecureShuffleUsageInspection */
8063 5
            \shuffle($indexes);
8064
8065
            // init
8066 5
            $shuffled_str = '';
8067
8068 5
            foreach ($indexes as &$i) {
8069 5
                $tmp_sub_str = \mb_substr($str, $i, 1);
8070 5
                if ($tmp_sub_str !== false) {
8071 5
                    $shuffled_str .= $tmp_sub_str;
8072
                }
8073
            }
8074
        } else {
8075
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8076
8077
            $indexes = \range(0, (int) self::strlen($str, $encoding) - 1);
8078
            /** @noinspection NonSecureShuffleUsageInspection */
8079
            \shuffle($indexes);
8080
8081
            // init
8082
            $shuffled_str = '';
8083
8084
            foreach ($indexes as &$i) {
8085
                $tmp_sub_str = self::substr($str, $i, 1, $encoding);
8086
                if ($tmp_sub_str !== false) {
8087
                    $shuffled_str .= $tmp_sub_str;
8088
                }
8089
            }
8090
        }
8091
8092 5
        return $shuffled_str;
8093
    }
8094
8095
    /**
8096
     * Returns the substring beginning at $start, and up to, but not including
8097
     * the index specified by $end. If $end is omitted, the function extracts
8098
     * the remaining string. If $end is negative, it is computed from the end
8099
     * of the string.
8100
     *
8101
     * @param string   $str
8102
     * @param int      $start    <p>Initial index from which to begin extraction.</p>
8103
     * @param int|null $end      [optional] <p>Index at which to end extraction. Default: null</p>
8104
     * @param string   $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8105
     *
8106
     * @psalm-pure
8107
     *
8108
     * @return false|string
8109
     *                      <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i>
8110
     *                      characters long, <b>FALSE</b> will be returned.
8111
     */
8112
    public static function str_slice(
8113
        string $str,
8114
        int $start,
8115
        int $end = null,
8116
        string $encoding = 'UTF-8'
8117
    ) {
8118 18
        if ($encoding === 'UTF-8') {
8119 7
            if ($end === null) {
8120 1
                $length = (int) \mb_strlen($str);
8121 6
            } elseif ($end >= 0 && $end <= $start) {
8122 2
                return '';
8123 4
            } elseif ($end < 0) {
8124 1
                $length = (int) \mb_strlen($str) + $end - $start;
8125
            } else {
8126 3
                $length = $end - $start;
8127
            }
8128
8129 5
            return \mb_substr($str, $start, $length);
8130
        }
8131
8132 11
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8133
8134 11
        if ($end === null) {
8135 5
            $length = (int) self::strlen($str, $encoding);
8136 6
        } elseif ($end >= 0 && $end <= $start) {
8137 2
            return '';
8138 4
        } elseif ($end < 0) {
8139 1
            $length = (int) self::strlen($str, $encoding) + $end - $start;
8140
        } else {
8141 3
            $length = $end - $start;
8142
        }
8143
8144 9
        return self::substr($str, $start, $length, $encoding);
8145
    }
8146
8147
    /**
8148
     * Convert a string to e.g.: "snake_case"
8149
     *
8150
     * @param string $str
8151
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8152
     *
8153
     * @psalm-pure
8154
     *
8155
     * @return string
8156
     *                <p>A string in snake_case.</p>
8157
     */
8158
    public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string
8159
    {
8160 22
        if ($str === '') {
8161
            return '';
8162
        }
8163
8164 22
        $str = \str_replace(
8165 22
            '-',
8166 22
            '_',
8167 22
            self::normalize_whitespace($str)
8168
        );
8169
8170 22
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8171 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8172
        }
8173
8174 22
        $str = (string) \preg_replace_callback(
8175 22
            '/([\\p{N}|\\p{Lu}])/u',
8176
            /**
8177
             * @param string[] $matches
8178
             *
8179
             * @psalm-pure
8180
             *
8181
             * @return string
8182
             */
8183
            static function (array $matches) use ($encoding): string {
8184 9
                $match = $matches[1];
8185 9
                $match_int = (int) $match;
8186
8187 9
                if ((string) $match_int === $match) {
8188 4
                    return '_' . $match . '_';
8189
                }
8190
8191 5
                if ($encoding === 'UTF-8') {
8192 5
                    return '_' . \mb_strtolower($match);
8193
                }
8194
8195
                return '_' . self::strtolower($match, $encoding);
8196 22
            },
8197 22
            $str
8198
        );
8199
8200 22
        $str = (string) \preg_replace(
8201
            [
8202 22
                '/\\s+/u',           // convert spaces to "_"
8203
                '/^\\s+|\\s+$/u', // trim leading & trailing spaces
8204
                '/_+/',                 // remove double "_"
8205
            ],
8206
            [
8207 22
                '_',
8208
                '',
8209
                '_',
8210
            ],
8211 22
            $str
8212
        );
8213
8214 22
        return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace
8215
    }
8216
8217
    /**
8218
     * Sort all characters according to code points.
8219
     *
8220
     * EXAMPLE: <code>UTF8::str_sort('  -ABC-中文空白-  '); // '    ---ABC中文白空'</code>
8221
     *
8222
     * @param string $str    <p>A UTF-8 string.</p>
8223
     * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
8224
     * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
8225
     *
8226
     * @psalm-pure
8227
     *
8228
     * @return string
8229
     *                <p>A string of sorted characters.</p>
8230
     */
8231
    public static function str_sort(string $str, bool $unique = false, bool $desc = false): string
8232
    {
8233 2
        $array = self::codepoints($str);
8234
8235 2
        if ($unique) {
8236 2
            $array = \array_flip(\array_flip($array));
8237
        }
8238
8239 2
        if ($desc) {
8240 2
            \arsort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of arsort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8240
            \arsort(/** @scrutinizer ignore-type */ $array);
Loading history...
8241
        } else {
8242 2
            \asort($array);
0 ignored issues
show
Bug introduced by
It seems like $array can also be of type null; however, parameter $array of asort() does only seem to accept array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

8242
            \asort(/** @scrutinizer ignore-type */ $array);
Loading history...
8243
        }
8244
8245 2
        return self::string($array);
8246
    }
8247
8248
    /**
8249
     * Convert a string to an array of Unicode characters.
8250
     *
8251
     * EXAMPLE: <code>
8252
     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
8253
     * </code>
8254
     *
8255
     * @param int[]|string[] $input                   <p>The string[] or int[] to split into array.</p>
8256
     * @param int            $length                  [optional] <p>Max character length of each array
8257
     *                                                lement.</p>
8258
     * @param bool           $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8259
     *                                                string.</p>
8260
     * @param bool           $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8261
     *                                                "mb_substr"</p>
8262
     *
8263
     * @psalm-pure
8264
     *
8265
     * @return string[][]
8266
     *                    <p>An array containing chunks of the input.</p>
8267
     */
8268
    public static function str_split_array(
8269
        array $input,
8270
        int $length = 1,
8271
        bool $clean_utf8 = false,
8272
        bool $try_to_use_mb_functions = true
8273
    ): array {
8274 1
        foreach ($input as $k => &$v) {
8275 1
            $v = self::str_split(
8276 1
                $v,
8277 1
                $length,
8278 1
                $clean_utf8,
8279 1
                $try_to_use_mb_functions
8280
            );
8281
        }
8282
8283
        /** @var string[][] $input */
8284 1
        return $input;
8285
    }
8286
8287
    /**
8288
     * Convert a string to an array of unicode characters.
8289
     *
8290
     * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code>
8291
     *
8292
     * @param int|string $input                   <p>The string or int to split into array.</p>
8293
     * @param int        $length                  [optional] <p>Max character length of each array
8294
     *                                            element.</p>
8295
     * @param bool       $clean_utf8              [optional] <p>Remove non UTF-8 chars from the
8296
     *                                            string.</p>
8297
     * @param bool       $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use
8298
     *                                            "mb_substr"</p>
8299
     *
8300
     * @psalm-pure
8301
     *
8302
     * @return string[]
8303
     *                  <p>An array containing chunks of chars from the input.</p>
8304
     *
8305
     * @noinspection SuspiciousBinaryOperationInspection
8306
     * @noinspection OffsetOperationsInspection
8307
     */
8308
    public static function str_split(
8309
        $input,
8310
        int $length = 1,
8311
        bool $clean_utf8 = false,
8312
        bool $try_to_use_mb_functions = true
8313
    ): array {
8314 89
        if ($length <= 0) {
8315 3
            return [];
8316
        }
8317
8318
        // this is only an old fallback
8319
        /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
8320
        /** @var int|int[]|string|string[] $input */
8321 88
        $input = $input;
8322 88
        if (\is_array($input)) {
8323
            /**
8324
             * @psalm-suppress InvalidReturnStatement
8325
             */
8326
            return self::str_split_array(
0 ignored issues
show
Bug Best Practice introduced by
The expression return self::str_split_a...ry_to_use_mb_functions) returns the type array<mixed,string[]> which is incompatible with the documented return type string[].
Loading history...
8327
                $input,
8328
                $length,
8329
                $clean_utf8,
8330
                $try_to_use_mb_functions
8331
            );
8332
        }
8333
8334
        // init
8335 88
        $input = (string) $input;
8336
8337 88
        if ($input === '') {
8338 13
            return [];
8339
        }
8340
8341 85
        if ($clean_utf8) {
8342 19
            $input = self::clean($input);
8343
        }
8344
8345
        if (
8346 85
            $try_to_use_mb_functions
8347
            &&
8348 85
            self::$SUPPORT['mbstring'] === true
8349
        ) {
8350 81
            if (Bootup::is_php('7.4')) {
8351
                /**
8352
                 * @psalm-suppress ImpureFunctionCall - why?
8353
                 */
8354
                $return = \mb_str_split($input, $length);
8355
                if ($return !== false) {
8356
                    return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
8357
                }
8358
            }
8359
8360 81
            $i_max = \mb_strlen($input);
8361 81
            if ($i_max <= 127) {
8362 75
                $ret = [];
8363 75
                for ($i = 0; $i < $i_max; ++$i) {
8364 75
                    $ret[] = \mb_substr($input, $i, 1);
8365
                }
8366
            } else {
8367 16
                $return_array = [];
8368 16
                \preg_match_all('/./us', $input, $return_array);
8369 81
                $ret = $return_array[0] ?? [];
8370
            }
8371 23
        } elseif (self::$SUPPORT['pcre_utf8'] === true) {
8372 17
            $return_array = [];
8373 17
            \preg_match_all('/./us', $input, $return_array);
8374 17
            $ret = $return_array[0] ?? [];
8375
        } else {
8376
8377
            // fallback
8378
8379 8
            $ret = [];
8380 8
            $len = \strlen($input);
8381
8382
            /** @noinspection ForeachInvariantsInspection */
8383 8
            for ($i = 0; $i < $len; ++$i) {
8384 8
                if (($input[$i] & "\x80") === "\x00") {
8385 8
                    $ret[] = $input[$i];
8386
                } elseif (
8387 8
                    isset($input[$i + 1])
8388
                    &&
8389 8
                    ($input[$i] & "\xE0") === "\xC0"
8390
                ) {
8391 4
                    if (($input[$i + 1] & "\xC0") === "\x80") {
8392 4
                        $ret[] = $input[$i] . $input[$i + 1];
8393
8394 4
                        ++$i;
8395
                    }
8396
                } elseif (
8397 6
                    isset($input[$i + 2])
8398
                    &&
8399 6
                    ($input[$i] & "\xF0") === "\xE0"
8400
                ) {
8401
                    if (
8402 6
                        ($input[$i + 1] & "\xC0") === "\x80"
8403
                        &&
8404 6
                        ($input[$i + 2] & "\xC0") === "\x80"
8405
                    ) {
8406 6
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2];
8407
8408 6
                        $i += 2;
8409
                    }
8410
                } elseif (
8411
                    isset($input[$i + 3])
8412
                    &&
8413
                    ($input[$i] & "\xF8") === "\xF0"
8414
                ) {
8415
                    if (
8416
                        ($input[$i + 1] & "\xC0") === "\x80"
8417
                        &&
8418
                        ($input[$i + 2] & "\xC0") === "\x80"
8419
                        &&
8420
                        ($input[$i + 3] & "\xC0") === "\x80"
8421
                    ) {
8422
                        $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3];
8423
8424
                        $i += 3;
8425
                    }
8426
                }
8427
            }
8428
        }
8429
8430 85
        if ($length > 1) {
8431 11
            $ret = \array_chunk($ret, $length);
8432
8433 11
            return \array_map(
8434
                static function (array &$item): string {
8435 11
                    return \implode('', $item);
8436 11
                },
8437 11
                $ret
8438
            );
8439
        }
8440
8441 78
        if (isset($ret[0]) && $ret[0] === '') {
8442
            return [];
8443
        }
8444
8445 78
        return $ret;
8446
    }
8447
8448
    /**
8449
     * Splits the string with the provided regular expression, returning an
8450
     * array of strings. An optional integer $limit will truncate the
8451
     * results.
8452
     *
8453
     * @param string $str
8454
     * @param string $pattern <p>The regex with which to split the string.</p>
8455
     * @param int    $limit   [optional] <p>Maximum number of results to return. Default: -1 === no limit</p>
8456
     *
8457
     * @psalm-pure
8458
     *
8459
     * @return string[]
8460
     *                  <p>An array of strings.</p>
8461
     */
8462
    public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array
8463
    {
8464 16
        if ($limit === 0) {
8465 2
            return [];
8466
        }
8467
8468 14
        if ($pattern === '') {
8469 1
            return [$str];
8470
        }
8471
8472 13
        if (self::$SUPPORT['mbstring'] === true) {
8473 13
            if ($limit >= 0) {
8474
                /** @noinspection PhpComposerExtensionStubsInspection */
8475 8
                $result_tmp = \mb_split($pattern, $str);
8476
8477 8
                $result = [];
8478 8
                foreach ($result_tmp as $item_tmp) {
8479 8
                    if ($limit === 0) {
8480 4
                        break;
8481
                    }
8482 8
                    --$limit;
8483
8484 8
                    $result[] = $item_tmp;
8485
                }
8486
8487 8
                return $result;
8488
            }
8489
8490
            /** @noinspection PhpComposerExtensionStubsInspection */
8491 5
            return \mb_split($pattern, $str);
8492
        }
8493
8494
        if ($limit > 0) {
8495
            ++$limit;
8496
        } else {
8497
            $limit = -1;
8498
        }
8499
8500
        $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit);
8501
8502
        if ($array === false) {
8503
            return [];
8504
        }
8505
8506
        if ($limit > 0 && \count($array) === $limit) {
8507
            \array_pop($array);
8508
        }
8509
8510
        return $array;
8511
    }
8512
8513
    /**
8514
     * Check if the string starts with the given substring.
8515
     *
8516
     * EXAMPLE: <code>
8517
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
8518
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
8519
     * </code>
8520
     *
8521
     * @param string $haystack <p>The string to search in.</p>
8522
     * @param string $needle   <p>The substring to search for.</p>
8523
     *
8524
     * @psalm-pure
8525
     *
8526
     * @return bool
8527
     */
8528
    public static function str_starts_with(string $haystack, string $needle): bool
8529
    {
8530 19
        if ($needle === '') {
8531 2
            return true;
8532
        }
8533
8534 19
        if ($haystack === '') {
8535
            return false;
8536
        }
8537
8538 19
        return \strpos($haystack, $needle) === 0;
8539
    }
8540
8541
    /**
8542
     * Returns true if the string begins with any of $substrings, false otherwise.
8543
     *
8544
     * - case-sensitive
8545
     *
8546
     * @param string $str        <p>The input string.</p>
8547
     * @param array  $substrings <p>Substrings to look for.</p>
8548
     *
8549
     * @psalm-pure
8550
     *
8551
     * @return bool
8552
     *              <p>Whether or not $str starts with $substring.</p>
8553
     */
8554
    public static function str_starts_with_any(string $str, array $substrings): bool
8555
    {
8556 8
        if ($str === '') {
8557
            return false;
8558
        }
8559
8560 8
        if ($substrings === []) {
8561
            return false;
8562
        }
8563
8564 8
        foreach ($substrings as &$substring) {
8565 8
            if (self::str_starts_with($str, $substring)) {
8566 8
                return true;
8567
            }
8568
        }
8569
8570 6
        return false;
8571
    }
8572
8573
    /**
8574
     * Gets the substring after the first occurrence of a separator.
8575
     *
8576
     * @param string $str       <p>The input string.</p>
8577
     * @param string $separator <p>The string separator.</p>
8578
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8579
     *
8580
     * @psalm-pure
8581
     *
8582
     * @return string
8583
     */
8584
    public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8585
    {
8586 1
        if ($separator === '' || $str === '') {
8587 1
            return '';
8588
        }
8589
8590 1
        if ($encoding === 'UTF-8') {
8591 1
            $offset = \mb_strpos($str, $separator);
8592 1
            if ($offset === false) {
8593 1
                return '';
8594
            }
8595
8596 1
            return (string) \mb_substr(
8597 1
                $str,
8598 1
                $offset + (int) \mb_strlen($separator)
8599
            );
8600
        }
8601
8602
        $offset = self::strpos($str, $separator, 0, $encoding);
8603
        if ($offset === false) {
8604
            return '';
8605
        }
8606
8607
        return (string) \mb_substr(
8608
            $str,
8609
            $offset + (int) self::strlen($separator, $encoding),
8610
            null,
8611
            $encoding
8612
        );
8613
    }
8614
8615
    /**
8616
     * Gets the substring after the last occurrence of a separator.
8617
     *
8618
     * @param string $str       <p>The input string.</p>
8619
     * @param string $separator <p>The string separator.</p>
8620
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8621
     *
8622
     * @psalm-pure
8623
     *
8624
     * @return string
8625
     */
8626
    public static function str_substr_after_last_separator(
8627
        string $str,
8628
        string $separator,
8629
        string $encoding = 'UTF-8'
8630
    ): string {
8631 1
        if ($separator === '' || $str === '') {
8632 1
            return '';
8633
        }
8634
8635 1
        if ($encoding === 'UTF-8') {
8636 1
            $offset = \mb_strrpos($str, $separator);
8637 1
            if ($offset === false) {
8638 1
                return '';
8639
            }
8640
8641 1
            return (string) \mb_substr(
8642 1
                $str,
8643 1
                $offset + (int) \mb_strlen($separator)
8644
            );
8645
        }
8646
8647
        $offset = self::strrpos($str, $separator, 0, $encoding);
8648
        if ($offset === false) {
8649
            return '';
8650
        }
8651
8652
        return (string) self::substr(
8653
            $str,
8654
            $offset + (int) self::strlen($separator, $encoding),
8655
            null,
8656
            $encoding
8657
        );
8658
    }
8659
8660
    /**
8661
     * Gets the substring before the first occurrence of a separator.
8662
     *
8663
     * @param string $str       <p>The input string.</p>
8664
     * @param string $separator <p>The string separator.</p>
8665
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8666
     *
8667
     * @psalm-pure
8668
     *
8669
     * @return string
8670
     */
8671
    public static function str_substr_before_first_separator(
8672
        string $str,
8673
        string $separator,
8674
        string $encoding = 'UTF-8'
8675
    ): string {
8676 1
        if ($separator === '' || $str === '') {
8677 1
            return '';
8678
        }
8679
8680 1
        if ($encoding === 'UTF-8') {
8681 1
            $offset = \mb_strpos($str, $separator);
8682 1
            if ($offset === false) {
8683 1
                return '';
8684
            }
8685
8686 1
            return (string) \mb_substr(
8687 1
                $str,
8688 1
                0,
8689 1
                $offset
8690
            );
8691
        }
8692
8693
        $offset = self::strpos($str, $separator, 0, $encoding);
8694
        if ($offset === false) {
8695
            return '';
8696
        }
8697
8698
        return (string) self::substr(
8699
            $str,
8700
            0,
8701
            $offset,
8702
            $encoding
8703
        );
8704
    }
8705
8706
    /**
8707
     * Gets the substring before the last occurrence of a separator.
8708
     *
8709
     * @param string $str       <p>The input string.</p>
8710
     * @param string $separator <p>The string separator.</p>
8711
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
8712
     *
8713
     * @psalm-pure
8714
     *
8715
     * @return string
8716
     */
8717
    public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string
8718
    {
8719 1
        if ($separator === '' || $str === '') {
8720 1
            return '';
8721
        }
8722
8723 1
        if ($encoding === 'UTF-8') {
8724 1
            $offset = \mb_strrpos($str, $separator);
8725 1
            if ($offset === false) {
8726 1
                return '';
8727
            }
8728
8729 1
            return (string) \mb_substr(
8730 1
                $str,
8731 1
                0,
8732 1
                $offset
8733
            );
8734
        }
8735
8736
        $offset = self::strrpos($str, $separator, 0, $encoding);
8737
        if ($offset === false) {
8738
            return '';
8739
        }
8740
8741
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
8742
8743
        return (string) self::substr(
8744
            $str,
8745
            0,
8746
            $offset,
8747
            $encoding
8748
        );
8749
    }
8750
8751
    /**
8752
     * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle".
8753
     *
8754
     * @param string $str           <p>The input string.</p>
8755
     * @param string $needle        <p>The string to look for.</p>
8756
     * @param bool   $before_needle [optional] <p>Default: false</p>
8757
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8758
     *
8759
     * @psalm-pure
8760
     *
8761
     * @return string
8762
     */
8763
    public static function str_substr_first(
8764
        string $str,
8765
        string $needle,
8766
        bool $before_needle = false,
8767
        string $encoding = 'UTF-8'
8768
    ): string {
8769 2
        if ($str === '' || $needle === '') {
8770 2
            return '';
8771
        }
8772
8773 2
        if ($encoding === 'UTF-8') {
8774 2
            if ($before_needle) {
8775 1
                $part = \mb_strstr(
8776 1
                    $str,
8777 1
                    $needle,
8778 1
                    $before_needle
8779
                );
8780
            } else {
8781 1
                $part = \mb_strstr(
8782 1
                    $str,
8783 2
                    $needle
8784
                );
8785
            }
8786
        } else {
8787
            $part = self::strstr(
8788
                $str,
8789
                $needle,
8790
                $before_needle,
8791
                $encoding
8792
            );
8793
        }
8794
8795 2
        return $part === false ? '' : $part;
8796
    }
8797
8798
    /**
8799
     * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle".
8800
     *
8801
     * @param string $str           <p>The input string.</p>
8802
     * @param string $needle        <p>The string to look for.</p>
8803
     * @param bool   $before_needle [optional] <p>Default: false</p>
8804
     * @param string $encoding      [optional] <p>Default: 'UTF-8'</p>
8805
     *
8806
     * @psalm-pure
8807
     *
8808
     * @return string
8809
     */
8810
    public static function str_substr_last(
8811
        string $str,
8812
        string $needle,
8813
        bool $before_needle = false,
8814
        string $encoding = 'UTF-8'
8815
    ): string {
8816 2
        if ($str === '' || $needle === '') {
8817 2
            return '';
8818
        }
8819
8820 2
        if ($encoding === 'UTF-8') {
8821 2
            if ($before_needle) {
8822 1
                $part = \mb_strrchr(
8823 1
                    $str,
8824 1
                    $needle,
8825 1
                    $before_needle
8826
                );
8827
            } else {
8828 1
                $part = \mb_strrchr(
8829 1
                    $str,
8830 2
                    $needle
8831
                );
8832
            }
8833
        } else {
8834
            $part = self::strrchr(
8835
                $str,
8836
                $needle,
8837
                $before_needle,
8838
                $encoding
8839
            );
8840
        }
8841
8842 2
        return $part === false ? '' : $part;
8843
    }
8844
8845
    /**
8846
     * Surrounds $str with the given substring.
8847
     *
8848
     * @param string $str
8849
     * @param string $substring <p>The substring to add to both sides.</p>
8850
     *
8851
     * @psalm-pure
8852
     *
8853
     * @return string
8854
     *                <p>A string with the substring both prepended and appended.</p>
8855
     */
8856
    public static function str_surround(string $str, string $substring): string
8857
    {
8858 5
        return $substring . $str . $substring;
8859
    }
8860
8861
    /**
8862
     * Returns a trimmed string with the first letter of each word capitalized.
8863
     * Also accepts an array, $ignore, allowing you to list words not to be
8864
     * capitalized.
8865
     *
8866
     * @param string              $str
8867
     * @param array|string[]|null $ignore                        [optional] <p>An array of words not to capitalize or
8868
     *                                                           null. Default: null</p>
8869
     * @param string              $encoding                      [optional] <p>Default: 'UTF-8'</p>
8870
     * @param bool                $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the
8871
     *                                                           string.</p>
8872
     * @param string|null         $lang                          [optional] <p>Set the language for special cases: az,
8873
     *                                                           el, lt, tr</p>
8874
     * @param bool                $try_to_keep_the_string_length [optional] <p>true === try to keep the string length:
8875
     *                                                           e.g. ẞ -> ß</p>
8876
     * @param bool                $use_trim_first                [optional] <p>true === trim the input string,
8877
     *                                                           first</p>
8878
     * @param string|null         $word_define_chars             [optional] <p>An string of chars that will be used as
8879
     *                                                           whitespace separator === words.</p>
8880
     *
8881
     * @psalm-pure
8882
     *
8883
     * @return string
8884
     *                <p>The titleized string.</p>
8885
     *
8886
     * @noinspection PhpTooManyParametersInspection
8887
     */
8888
    public static function str_titleize(
8889
        string $str,
8890
        array $ignore = null,
8891
        string $encoding = 'UTF-8',
8892
        bool $clean_utf8 = false,
8893
        string $lang = null,
8894
        bool $try_to_keep_the_string_length = false,
8895
        bool $use_trim_first = true,
8896
        string $word_define_chars = null
8897
    ): string {
8898 10
        if ($str === '') {
8899
            return '';
8900
        }
8901
8902 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
8903 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
8904
        }
8905
8906 10
        if ($use_trim_first) {
8907 10
            $str = \trim($str);
8908
        }
8909
8910 10
        if ($clean_utf8) {
8911
            $str = self::clean($str);
8912
        }
8913
8914 10
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
8915
8916 10
        if ($word_define_chars) {
8917 4
            $word_define_chars = \preg_quote($word_define_chars, '/');
8918
        } else {
8919 6
            $word_define_chars = '';
8920
        }
8921
8922 10
        $str = (string) \preg_replace_callback(
8923 10
            '/([^\\s' . $word_define_chars . ']+)/u',
8924
            static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string {
8925 10
                if ($ignore !== null && \in_array($match[0], $ignore, true)) {
8926 4
                    return $match[0];
8927
                }
8928
8929 10
                if ($use_mb_functions) {
8930 10
                    if ($encoding === 'UTF-8') {
8931 10
                        return \mb_strtoupper(\mb_substr($match[0], 0, 1))
8932 10
                               . \mb_strtolower(\mb_substr($match[0], 1));
8933
                    }
8934
8935
                    return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding)
8936
                           . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding);
8937
                }
8938
8939
                return self::ucfirst(
8940
                    self::strtolower(
8941
                        $match[0],
8942
                        $encoding,
8943
                        false,
8944
                        $lang,
8945
                        $try_to_keep_the_string_length
8946
                    ),
8947
                    $encoding,
8948
                    false,
8949
                    $lang,
8950
                    $try_to_keep_the_string_length
8951
                );
8952 10
            },
8953 10
            $str
8954
        );
8955
8956 10
        return $str;
8957
    }
8958
8959
    /**
8960
     * Returns a trimmed string in proper title case.
8961
     *
8962
     * Also accepts an array, $ignore, allowing you to list words not to be
8963
     * capitalized.
8964
     *
8965
     * Adapted from John Gruber's script.
8966
     *
8967
     * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78
8968
     *
8969
     * @param string $str
8970
     * @param array  $ignore   <p>An array of words not to capitalize.</p>
8971
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
8972
     *
8973
     * @psalm-pure
8974
     *
8975
     * @return string
8976
     *                <p>The titleized string.</p>
8977
     */
8978
    public static function str_titleize_for_humans(
8979
        string $str,
8980
        array $ignore = [],
8981
        string $encoding = 'UTF-8'
8982
    ): string {
8983 35
        if ($str === '') {
8984
            return '';
8985
        }
8986
8987
        $small_words = [
8988 35
            '(?<!q&)a',
8989
            'an',
8990
            'and',
8991
            'as',
8992
            'at(?!&t)',
8993
            'but',
8994
            'by',
8995
            'en',
8996
            'for',
8997
            'if',
8998
            'in',
8999
            'of',
9000
            'on',
9001
            'or',
9002
            'the',
9003
            'to',
9004
            'v[.]?',
9005
            'via',
9006
            'vs[.]?',
9007
        ];
9008
9009 35
        if ($ignore !== []) {
9010 1
            $small_words = \array_merge($small_words, $ignore);
9011
        }
9012
9013 35
        $small_words_rx = \implode('|', $small_words);
9014 35
        $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?';
9015
9016 35
        $str = \trim($str);
9017
9018 35
        if (!self::has_lowercase($str)) {
9019 2
            $str = self::strtolower($str, $encoding);
9020
        }
9021
9022
        // the main substitutions
9023
        /** @noinspection RegExpDuplicateAlternationBranch - false-positive - https://youtrack.jetbrains.com/issue/WI-51002 */
9024 35
        $str = (string) \preg_replace_callback(
9025
            '~\\b (_*) (?:                                                           # 1. Leading underscore and
9026
                        ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ |                # 2. file path or 
9027 35
                          [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' )  #    URL, domain, or email
9028
                        |
9029 35
                        ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' )           # 3. or small word (case-insensitive)
9030
                        |
9031 35
                        ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 4. or word w/o internal caps
9032
                        |
9033 35
                        ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' )     # 5. or some other word
9034
                      ) (_*) \\b                                                          # 6. With trailing underscore
9035
                    ~ux',
9036
            /**
9037
             * @param string[] $matches
9038
             *
9039
             * @psalm-pure
9040
             *
9041
             * @return string
9042
             */
9043
            static function (array $matches) use ($encoding): string {
9044
                // preserve leading underscore
9045 35
                $str = $matches[1];
9046 35
                if ($matches[2]) {
9047
                    // preserve URLs, domains, emails and file paths
9048 5
                    $str .= $matches[2];
9049 35
                } elseif ($matches[3]) {
9050
                    // lower-case small words
9051 25
                    $str .= self::strtolower($matches[3], $encoding);
9052 35
                } elseif ($matches[4]) {
9053
                    // capitalize word w/o internal caps
9054 34
                    $str .= static::ucfirst($matches[4], $encoding);
9055
                } else {
9056
                    // preserve other kinds of word (iPhone)
9057 7
                    $str .= $matches[5];
9058
                }
9059
                // preserve trailing underscore
9060 35
                $str .= $matches[6];
9061
9062 35
                return $str;
9063 35
            },
9064 35
            $str
9065
        );
9066
9067
        // Exceptions for small words: capitalize at start of title...
9068 35
        $str = (string) \preg_replace_callback(
9069
            '~(  \\A [[:punct:]]*            # start of title...
9070
                      |  [:.;?!][ ]+                # or of subsentence...
9071
                      |  [ ][\'"“‘(\[][ ]* )        # or of inserted subphrase...
9072 35
                      ( ' . $small_words_rx . ' ) \\b # ...followed by small word
9073
                     ~uxi',
9074
            /**
9075
             * @param string[] $matches
9076
             *
9077
             * @psalm-pure
9078
             *
9079
             * @return string
9080
             */
9081
            static function (array $matches) use ($encoding): string {
9082 11
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9083 35
            },
9084 35
            $str
9085
        );
9086
9087
        // ...and end of title
9088 35
        $str = (string) \preg_replace_callback(
9089 35
            '~\\b ( ' . $small_words_rx . ' ) # small word...
9090
                      (?= [[:punct:]]* \Z          # ...at the end of the title...
9091
                      |   [\'"’”)\]] [ ] )         # ...or of an inserted subphrase?
9092
                     ~uxi',
9093
            /**
9094
             * @param string[] $matches
9095
             *
9096
             * @psalm-pure
9097
             *
9098
             * @return string
9099
             */
9100
            static function (array $matches) use ($encoding): string {
9101 3
                return static::ucfirst($matches[1], $encoding);
9102 35
            },
9103 35
            $str
9104
        );
9105
9106
        // Exceptions for small words in hyphenated compound words.
9107
        // e.g. "in-flight" -> In-Flight
9108 35
        $str = (string) \preg_replace_callback(
9109
            '~\\b
9110
                        (?<! -)                   # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight)
9111 35
                        ( ' . $small_words_rx . ' )
9112
                        (?= -[[:alpha:]]+)        # lookahead for "-someword"
9113
                       ~uxi',
9114
            /**
9115
             * @param string[] $matches
9116
             *
9117
             * @psalm-pure
9118
             *
9119
             * @return string
9120
             */
9121
            static function (array $matches) use ($encoding): string {
9122
                return static::ucfirst($matches[1], $encoding);
9123 35
            },
9124 35
            $str
9125
        );
9126
9127
        // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point)
9128 35
        $str = (string) \preg_replace_callback(
9129
            '~\\b
9130
                      (?<!…)                    # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in)
9131
                      ( [[:alpha:]]+- )         # $1 = first word and hyphen, should already be properly capped
9132 35
                      ( ' . $small_words_rx . ' ) # ...followed by small word
9133
                      (?!	- )                 # Negative lookahead for another -
9134
                     ~uxi',
9135
            /**
9136
             * @param string[] $matches
9137
             *
9138
             * @psalm-pure
9139
             *
9140
             * @return string
9141
             */
9142
            static function (array $matches) use ($encoding): string {
9143
                return $matches[1] . static::ucfirst($matches[2], $encoding);
9144 35
            },
9145 35
            $str
9146
        );
9147
9148 35
        return $str;
9149
    }
9150
9151
    /**
9152
     * Get a binary representation of a specific string.
9153
     *
9154
     * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code>
9155
     *
9156
     * @param string $str <p>The input string.</p>
9157
     *
9158
     * @psalm-pure
9159
     *
9160
     * @return false|string
9161
     *                      <p>false on error</p>
9162
     */
9163
    public static function str_to_binary(string $str)
9164
    {
9165
        /** @var array|false $value - needed for PhpStan (stubs error) */
9166 2
        $value = \unpack('H*', $str);
9167 2
        if ($value === false) {
9168
            return false;
9169
        }
9170
9171
        /** @noinspection OffsetOperationsInspection */
9172 2
        return \base_convert($value[1], 16, 2);
9173
    }
9174
9175
    /**
9176
     * @param string   $str
9177
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9178
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9179
     *
9180
     * @psalm-pure
9181
     *
9182
     * @return string[]
9183
     */
9184
    public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array
9185
    {
9186 17
        if ($str === '') {
9187 1
            return $remove_empty_values ? [] : [''];
9188
        }
9189
9190 16
        if (self::$SUPPORT['mbstring'] === true) {
9191
            /** @noinspection PhpComposerExtensionStubsInspection */
9192 16
            $return = \mb_split("[\r\n]{1,2}", $str);
9193
        } else {
9194
            $return = \preg_split("/[\r\n]{1,2}/u", $str);
9195
        }
9196
9197 16
        if ($return === false) {
9198
            return $remove_empty_values ? [] : [''];
9199
        }
9200
9201
        if (
9202 16
            $remove_short_values === null
9203
            &&
9204 16
            !$remove_empty_values
9205
        ) {
9206 16
            return $return;
9207
        }
9208
9209
        return self::reduce_string_array(
9210
            $return,
9211
            $remove_empty_values,
9212
            $remove_short_values
9213
        );
9214
    }
9215
9216
    /**
9217
     * Convert a string into an array of words.
9218
     *
9219
     * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code>
9220
     *
9221
     * @param string   $str
9222
     * @param string   $char_list           <p>Additional chars for the definition of "words".</p>
9223
     * @param bool     $remove_empty_values <p>Remove empty values.</p>
9224
     * @param int|null $remove_short_values <p>The min. string length or null to disable</p>
9225
     *
9226
     * @psalm-pure
9227
     *
9228
     * @return string[]
9229
     */
9230
    public static function str_to_words(
9231
        string $str,
9232
        string $char_list = '',
9233
        bool $remove_empty_values = false,
9234
        int $remove_short_values = null
9235
    ): array {
9236 13
        if ($str === '') {
9237 4
            return $remove_empty_values ? [] : [''];
9238
        }
9239
9240 13
        $char_list = self::rxClass($char_list, '\pL');
9241
9242 13
        $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE);
9243 13
        if ($return === false) {
9244
            return $remove_empty_values ? [] : [''];
9245
        }
9246
9247
        if (
9248 13
            $remove_short_values === null
9249
            &&
9250 13
            !$remove_empty_values
9251
        ) {
9252 13
            return $return;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $return returns an array which contains values of type array which are incompatible with the documented value type string.
Loading history...
9253
        }
9254
9255 2
        $tmp_return = self::reduce_string_array(
9256 2
            $return,
9257 2
            $remove_empty_values,
9258 2
            $remove_short_values
9259
        );
9260
9261 2
        foreach ($tmp_return as &$item) {
9262 2
            $item = (string) $item;
9263
        }
9264
9265 2
        return $tmp_return;
9266
    }
9267
9268
    /**
9269
     * alias for "UTF8::to_ascii()"
9270
     *
9271
     * @param string $str
9272
     * @param string $unknown
9273
     * @param bool   $strict
9274
     *
9275
     * @psalm-pure
9276
     *
9277
     * @return string
9278
     *
9279
     * @see        UTF8::to_ascii()
9280
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
9281
     */
9282
    public static function str_transliterate(
9283
        string $str,
9284
        string $unknown = '?',
9285
        bool $strict = false
9286
    ): string {
9287 7
        return self::to_ascii($str, $unknown, $strict);
9288
    }
9289
9290
    /**
9291
     * Truncates the string to a given length. If $substring is provided, and
9292
     * truncating occurs, the string is further truncated so that the substring
9293
     * may be appended without exceeding the desired length.
9294
     *
9295
     * @param string $str
9296
     * @param int    $length    <p>Desired length of the truncated string.</p>
9297
     * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p>
9298
     * @param string $encoding  [optional] <p>Default: 'UTF-8'</p>
9299
     *
9300
     * @psalm-pure
9301
     *
9302
     * @return string
9303
     *                <p>A string after truncating.</p>
9304
     */
9305
    public static function str_truncate(
9306
        string $str,
9307
        int $length,
9308
        string $substring = '',
9309
        string $encoding = 'UTF-8'
9310
    ): string {
9311 22
        if ($str === '') {
9312
            return '';
9313
        }
9314
9315 22
        if ($encoding === 'UTF-8') {
9316 10
            if ($length >= (int) \mb_strlen($str)) {
9317 2
                return $str;
9318
            }
9319
9320 8
            if ($substring !== '') {
9321 4
                $length -= (int) \mb_strlen($substring);
9322
9323
                /** @noinspection UnnecessaryCastingInspection */
9324 4
                return (string) \mb_substr($str, 0, $length) . $substring;
9325
            }
9326
9327
            /** @noinspection UnnecessaryCastingInspection */
9328 4
            return (string) \mb_substr($str, 0, $length);
9329
        }
9330
9331 12
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9332
9333 12
        if ($length >= (int) self::strlen($str, $encoding)) {
9334 2
            return $str;
9335
        }
9336
9337 10
        if ($substring !== '') {
9338 6
            $length -= (int) self::strlen($substring, $encoding);
9339
        }
9340
9341
        return (
9342 10
               (string) self::substr(
9343 10
                   $str,
9344 10
                   0,
9345 10
                   $length,
9346 10
                   $encoding
9347
               )
9348 10
               ) . $substring;
9349
    }
9350
9351
    /**
9352
     * Truncates the string to a given length, while ensuring that it does not
9353
     * split words. If $substring is provided, and truncating occurs, the
9354
     * string is further truncated so that the substring may be appended without
9355
     * exceeding the desired length.
9356
     *
9357
     * @param string $str
9358
     * @param int    $length                                 <p>Desired length of the truncated string.</p>
9359
     * @param string $substring                              [optional] <p>The substring to append if it can fit.
9360
     *                                                       Default:
9361
     *                                                       ''</p>
9362
     * @param string $encoding                               [optional] <p>Default: 'UTF-8'</p>
9363
     * @param bool   $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p>
9364
     *
9365
     * @psalm-pure
9366
     *
9367
     * @return string
9368
     *                <p>A string after truncating.</p>
9369
     */
9370
    public static function str_truncate_safe(
9371
        string $str,
9372
        int $length,
9373
        string $substring = '',
9374
        string $encoding = 'UTF-8',
9375
        bool $ignore_do_not_split_words_for_one_word = false
9376
    ): string {
9377 47
        if ($str === '' || $length <= 0) {
9378 1
            return $substring;
9379
        }
9380
9381 47
        if ($encoding === 'UTF-8') {
9382 21
            if ($length >= (int) \mb_strlen($str)) {
9383 5
                return $str;
9384
            }
9385
9386
            // need to further trim the string so we can append the substring
9387 17
            $length -= (int) \mb_strlen($substring);
9388 17
            if ($length <= 0) {
9389 1
                return $substring;
9390
            }
9391
9392
            /** @var false|string $truncated - needed for PhpStan (stubs error) */
9393 17
            $truncated = \mb_substr($str, 0, $length);
9394 17
            if ($truncated === false) {
9395
                return '';
9396
            }
9397
9398
            // if the last word was truncated
9399 17
            $space_position = \mb_strpos($str, ' ', $length - 1);
9400 17
            if ($space_position !== $length) {
9401
                // find pos of the last occurrence of a space, get up to that
9402 13
                $last_position = \mb_strrpos($truncated, ' ', 0);
9403
9404
                if (
9405 13
                    $last_position !== false
9406
                    ||
9407
                    (
9408 3
                        $space_position !== false
9409
                        &&
9410 13
                         !$ignore_do_not_split_words_for_one_word
9411
                    )
9412
                ) {
9413 17
                    $truncated = (string) \mb_substr($truncated, 0, (int) $last_position);
9414
                }
9415
            }
9416
        } else {
9417 26
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9418
9419 26
            if ($length >= (int) self::strlen($str, $encoding)) {
9420 4
                return $str;
9421
            }
9422
9423
            // need to further trim the string so we can append the substring
9424 22
            $length -= (int) self::strlen($substring, $encoding);
9425 22
            if ($length <= 0) {
9426
                return $substring;
9427
            }
9428
9429 22
            $truncated = self::substr($str, 0, $length, $encoding);
9430
9431 22
            if ($truncated === false) {
9432
                return '';
9433
            }
9434
9435
            // if the last word was truncated
9436 22
            $space_position = self::strpos($str, ' ', $length - 1, $encoding);
9437 22
            if ($space_position !== $length) {
9438
                // find pos of the last occurrence of a space, get up to that
9439 12
                $last_position = self::strrpos($truncated, ' ', 0, $encoding);
9440
9441
                if (
9442 12
                    $last_position !== false
9443
                    ||
9444
                    (
9445 4
                        $space_position !== false
9446
                        &&
9447 12
                        !$ignore_do_not_split_words_for_one_word
9448
                    )
9449
                ) {
9450 9
                    $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding);
9451
                }
9452
            }
9453
        }
9454
9455 39
        return $truncated . $substring;
9456
    }
9457
9458
    /**
9459
     * Returns a lowercase and trimmed string separated by underscores.
9460
     * Underscores are inserted before uppercase characters (with the exception
9461
     * of the first character of the string), and in place of spaces as well as
9462
     * dashes.
9463
     *
9464
     * @param string $str
9465
     *
9466
     * @psalm-pure
9467
     *
9468
     * @return string
9469
     *                <p>The underscored string.</p>
9470
     */
9471
    public static function str_underscored(string $str): string
9472
    {
9473 16
        return self::str_delimit($str, '_');
9474
    }
9475
9476
    /**
9477
     * Returns an UpperCamelCase version of the supplied string. It trims
9478
     * surrounding spaces, capitalizes letters following digits, spaces, dashes
9479
     * and underscores, and removes spaces, dashes, underscores.
9480
     *
9481
     * @param string      $str                           <p>The input string.</p>
9482
     * @param string      $encoding                      [optional] <p>Default: 'UTF-8'</p>
9483
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
9484
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
9485
     *                                                   tr</p>
9486
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
9487
     *                                                   -> ß</p>
9488
     *
9489
     * @psalm-pure
9490
     *
9491
     * @return string
9492
     *                <p>A string in UpperCamelCase.</p>
9493
     */
9494
    public static function str_upper_camelize(
9495
        string $str,
9496
        string $encoding = 'UTF-8',
9497
        bool $clean_utf8 = false,
9498
        string $lang = null,
9499
        bool $try_to_keep_the_string_length = false
9500
    ): string {
9501 13
        return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length);
9502
    }
9503
9504
    /**
9505
     * alias for "UTF8::ucfirst()"
9506
     *
9507
     * @param string      $str
9508
     * @param string      $encoding
9509
     * @param bool        $clean_utf8
9510
     * @param string|null $lang
9511
     * @param bool        $try_to_keep_the_string_length
9512
     *
9513
     * @psalm-pure
9514
     *
9515
     * @return string
9516
     *
9517
     * @see        UTF8::ucfirst()
9518
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
9519
     */
9520
    public static function str_upper_first(
9521
        string $str,
9522
        string $encoding = 'UTF-8',
9523
        bool $clean_utf8 = false,
9524
        string $lang = null,
9525
        bool $try_to_keep_the_string_length = false
9526
    ): string {
9527 5
        return self::ucfirst(
9528 5
            $str,
9529 5
            $encoding,
9530 5
            $clean_utf8,
9531 5
            $lang,
9532 5
            $try_to_keep_the_string_length
9533
        );
9534
    }
9535
9536
    /**
9537
     * Get the number of words in a specific string.
9538
     *
9539
     * EXAMPLES: <code>
9540
     * // format: 0 -> return only word count (int)
9541
     * //
9542
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
9543
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
9544
     *
9545
     * // format: 1 -> return words (array)
9546
     * //
9547
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
9548
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
9549
     *
9550
     * // format: 2 -> return words with offset (array)
9551
     * //
9552
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
9553
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
9554
     * </code>
9555
     *
9556
     * @param string $str       <p>The input string.</p>
9557
     * @param int    $format    [optional] <p>
9558
     *                          <strong>0</strong> => return a number of words (default)<br>
9559
     *                          <strong>1</strong> => return an array of words<br>
9560
     *                          <strong>2</strong> => return an array of words with word-offset as key
9561
     *                          </p>
9562
     * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p>
9563
     *
9564
     * @psalm-pure
9565
     *
9566
     * @return int|string[]
9567
     *                      <p>The number of words in the string.</p>
9568
     */
9569
    public static function str_word_count(string $str, int $format = 0, string $char_list = '')
9570
    {
9571 2
        $str_parts = self::str_to_words($str, $char_list);
9572
9573 2
        $len = \count($str_parts);
9574
9575 2
        if ($format === 1) {
9576 2
            $number_of_words = [];
9577 2
            for ($i = 1; $i < $len; $i += 2) {
9578 2
                $number_of_words[] = $str_parts[$i];
9579
            }
9580 2
        } elseif ($format === 2) {
9581 2
            $number_of_words = [];
9582 2
            $offset = (int) self::strlen($str_parts[0]);
9583 2
            for ($i = 1; $i < $len; $i += 2) {
9584 2
                $number_of_words[$offset] = $str_parts[$i];
9585 2
                $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]);
9586
            }
9587
        } else {
9588 2
            $number_of_words = (int) (($len - 1) / 2);
9589
        }
9590
9591 2
        return $number_of_words;
9592
    }
9593
9594
    /**
9595
     * Case-insensitive string comparison.
9596
     *
9597
     * INFO: Case-insensitive version of UTF8::strcmp()
9598
     *
9599
     * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code>
9600
     *
9601
     * @param string $str1     <p>The first string.</p>
9602
     * @param string $str2     <p>The second string.</p>
9603
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
9604
     *
9605
     * @psalm-pure
9606
     *
9607
     * @return int
9608
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
9609
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
9610
     *             <strong>0</strong> if they are equal
9611
     */
9612
    public static function strcasecmp(
9613
        string $str1,
9614
        string $str2,
9615
        string $encoding = 'UTF-8'
9616
    ): int {
9617 23
        return self::strcmp(
9618 23
            self::strtocasefold(
9619 23
                $str1,
9620 23
                true,
9621 23
                false,
9622 23
                $encoding,
9623 23
                null,
9624 23
                false
9625
            ),
9626 23
            self::strtocasefold(
9627 23
                $str2,
9628 23
                true,
9629 23
                false,
9630 23
                $encoding,
9631 23
                null,
9632 23
                false
9633
            )
9634
        );
9635
    }
9636
9637
    /**
9638
     * alias for "UTF8::strstr()"
9639
     *
9640
     * @param string $haystack
9641
     * @param string $needle
9642
     * @param bool   $before_needle
9643
     * @param string $encoding
9644
     * @param bool   $clean_utf8
9645
     *
9646
     * @psalm-pure
9647
     *
9648
     * @return false|string
9649
     *
9650
     * @see        UTF8::strstr()
9651
     * @deprecated <p>please use "UTF8::strstr()"</p>
9652
     */
9653
    public static function strchr(
9654
        string $haystack,
9655
        string $needle,
9656
        bool $before_needle = false,
9657
        string $encoding = 'UTF-8',
9658
        bool $clean_utf8 = false
9659
    ) {
9660 2
        return self::strstr(
9661 2
            $haystack,
9662 2
            $needle,
9663 2
            $before_needle,
9664 2
            $encoding,
9665 2
            $clean_utf8
9666
        );
9667
    }
9668
9669
    /**
9670
     * Case-sensitive string comparison.
9671
     *
9672
     * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code>
9673
     *
9674
     * @param string $str1 <p>The first string.</p>
9675
     * @param string $str2 <p>The second string.</p>
9676
     *
9677
     * @psalm-pure
9678
     *
9679
     * @return int
9680
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
9681
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
9682
     *             <strong>0</strong> if they are equal
9683
     */
9684
    public static function strcmp(string $str1, string $str2): int
9685
    {
9686 29
        if ($str1 === $str2) {
9687 21
            return 0;
9688
        }
9689
9690 24
        return \strcmp(
9691 24
            \Normalizer::normalize($str1, \Normalizer::NFD),
9692 24
            \Normalizer::normalize($str2, \Normalizer::NFD)
9693
        );
9694
    }
9695
9696
    /**
9697
     * Find length of initial segment not matching mask.
9698
     *
9699
     * @param string   $str
9700
     * @param string   $char_list
9701
     * @param int      $offset
9702
     * @param int|null $length
9703
     * @param string   $encoding  [optional] <p>Set the charset for e.g. "mb_" function</p>
9704
     *
9705
     * @psalm-pure
9706
     *
9707
     * @return int
9708
     */
9709
    public static function strcspn(
9710
        string $str,
9711
        string $char_list,
9712
        int $offset = 0,
9713
        int $length = null,
9714
        string $encoding = 'UTF-8'
9715
    ): int {
9716 12
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
9717
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9718
        }
9719
9720 12
        if ($char_list === '') {
9721 2
            return (int) self::strlen($str, $encoding);
9722
        }
9723
9724 11
        if ($offset || $length !== null) {
9725 3
            if ($encoding === 'UTF-8') {
9726 3
                if ($length === null) {
9727 2
                    $str_tmp = \mb_substr($str, $offset);
9728
                } else {
9729 3
                    $str_tmp = \mb_substr($str, $offset, $length);
9730
                }
9731
            } else {
9732
                $str_tmp = self::substr($str, $offset, $length, $encoding);
9733
            }
9734
9735 3
            if ($str_tmp === false) {
9736
                return 0;
9737
            }
9738
9739
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
9740 3
            $str = $str_tmp;
9741
        }
9742
9743 11
        if ($str === '') {
9744 2
            return 0;
9745
        }
9746
9747 10
        $matches = [];
9748 10
        if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) {
9749 9
            $return = self::strlen($matches[1], $encoding);
9750 9
            if ($return === false) {
9751
                return 0;
9752
            }
9753
9754 9
            return $return;
9755
        }
9756
9757 2
        return (int) self::strlen($str, $encoding);
9758
    }
9759
9760
    /**
9761
     * alias for "UTF8::stristr()"
9762
     *
9763
     * @param string $haystack
9764
     * @param string $needle
9765
     * @param bool   $before_needle
9766
     * @param string $encoding
9767
     * @param bool   $clean_utf8
9768
     *
9769
     * @psalm-pure
9770
     *
9771
     * @return false|string
9772
     *
9773
     * @see        UTF8::stristr()
9774
     * @deprecated <p>please use "UTF8::stristr()"</p>
9775
     */
9776
    public static function strichr(
9777
        string $haystack,
9778
        string $needle,
9779
        bool $before_needle = false,
9780
        string $encoding = 'UTF-8',
9781
        bool $clean_utf8 = false
9782
    ) {
9783 1
        return self::stristr(
9784 1
            $haystack,
9785 1
            $needle,
9786 1
            $before_needle,
9787 1
            $encoding,
9788 1
            $clean_utf8
9789
        );
9790
    }
9791
9792
    /**
9793
     * Create a UTF-8 string from code points.
9794
     *
9795
     * INFO: opposite to UTF8::codepoints()
9796
     *
9797
     * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code>
9798
     *
9799
     * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p>
9800
     *
9801
     * @psalm-param int[]|numeric-string[]|int|numeric-string $intOrHex
9802
     *
9803
     * @psalm-pure
9804
     *
9805
     * @return string
9806
     *                <p>A UTF-8 encoded string.</p>
9807
     */
9808
    public static function string($intOrHex): string
9809
    {
9810 4
        if ($intOrHex === []) {
9811 4
            return '';
9812
        }
9813
9814 4
        if (!\is_array($intOrHex)) {
9815 1
            $intOrHex = [$intOrHex];
9816
        }
9817
9818 4
        $str = '';
9819 4
        foreach ($intOrHex as $strPart) {
9820 4
            $str .= '&#' . (int) $strPart . ';';
9821
        }
9822
9823 4
        return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5);
9824
    }
9825
9826
    /**
9827
     * Checks if string starts with "BOM" (Byte Order Mark Character) character.
9828
     *
9829
     * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code>
9830
     *
9831
     * @param string $str <p>The input string.</p>
9832
     *
9833
     * @psalm-pure
9834
     *
9835
     * @return bool
9836
     *              <strong>true</strong> if the string has BOM at the start,<br>
9837
     *              <strong>false</strong> otherwise
9838
     */
9839
    public static function string_has_bom(string $str): bool
9840
    {
9841
        /** @noinspection PhpUnusedLocalVariableInspection */
9842 6
        foreach (self::$BOM as $bom_string => &$bom_byte_length) {
9843 6
            if (\strpos($str, $bom_string) === 0) {
9844 6
                return true;
9845
            }
9846
        }
9847
9848 6
        return false;
9849
    }
9850
9851
    /**
9852
     * Strip HTML and PHP tags from a string + clean invalid UTF-8.
9853
     *
9854
     * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code>
9855
     *
9856
     * @see http://php.net/manual/en/function.strip-tags.php
9857
     *
9858
     * @param string      $str            <p>
9859
     *                                    The input string.
9860
     *                                    </p>
9861
     * @param string|null $allowable_tags [optional] <p>
9862
     *                                    You can use the optional second parameter to specify tags which should
9863
     *                                    not be stripped.
9864
     *                                    </p>
9865
     *                                    <p>
9866
     *                                    HTML comments and PHP tags are also stripped. This is hardcoded and
9867
     *                                    can not be changed with allowable_tags.
9868
     *                                    </p>
9869
     * @param bool        $clean_utf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
9870
     *
9871
     * @psalm-pure
9872
     *
9873
     * @return string
9874
     *                <p>The stripped string.</p>
9875
     */
9876
    public static function strip_tags(
9877
        string $str,
9878
        string $allowable_tags = null,
9879
        bool $clean_utf8 = false
9880
    ): string {
9881 4
        if ($str === '') {
9882 1
            return '';
9883
        }
9884
9885 4
        if ($clean_utf8) {
9886 2
            $str = self::clean($str);
9887
        }
9888
9889 4
        if ($allowable_tags === null) {
9890 4
            return \strip_tags($str);
9891
        }
9892
9893 2
        return \strip_tags($str, $allowable_tags);
9894
    }
9895
9896
    /**
9897
     * Strip all whitespace characters. This includes tabs and newline
9898
     * characters, as well as multibyte whitespace such as the thin space
9899
     * and ideographic space.
9900
     *
9901
     * EXAMPLE: <code>UTF8::strip_whitespace('   Ο     συγγραφέας  '); // 'Οσυγγραφέας'</code>
9902
     *
9903
     * @param string $str
9904
     *
9905
     * @psalm-pure
9906
     *
9907
     * @return string
9908
     */
9909
    public static function strip_whitespace(string $str): string
9910
    {
9911 36
        if ($str === '') {
9912 3
            return '';
9913
        }
9914
9915 33
        return (string) \preg_replace('/[[:space:]]+/u', '', $str);
9916
    }
9917
9918
    /**
9919
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
9920
     *
9921
     * INFO: use UTF8::stripos_in_byte() for the byte-length
9922
     *
9923
     * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ)
9924
     *
9925
     * @see http://php.net/manual/en/function.mb-stripos.php
9926
     *
9927
     * @param string $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
9928
     * @param string $needle     <p>The string to find in haystack.</p>
9929
     * @param int    $offset     [optional] <p>The position in haystack to start searching.</p>
9930
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
9931
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
9932
     *
9933
     * @psalm-pure
9934
     *
9935
     * @return false|int
9936
     *                   Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the
9937
     *                   haystack string,<br> or <strong>false</strong> if needle is not found
9938
     */
9939
    public static function stripos(
9940
        string $haystack,
9941
        string $needle,
9942
        int $offset = 0,
9943
        string $encoding = 'UTF-8',
9944
        bool $clean_utf8 = false
9945
    ) {
9946 25
        if ($haystack === '' || $needle === '') {
9947 5
            return false;
9948
        }
9949
9950 24
        if ($clean_utf8) {
9951
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
9952
            // if invalid characters are found in $haystack before $needle
9953 1
            $haystack = self::clean($haystack);
9954 1
            $needle = self::clean($needle);
9955
        }
9956
9957 24
        if (self::$SUPPORT['mbstring'] === true) {
9958 24
            if ($encoding === 'UTF-8') {
9959 24
                return \mb_stripos($haystack, $needle, $offset);
9960
            }
9961
9962 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
9963
9964 2
            return \mb_stripos($haystack, $needle, $offset, $encoding);
9965
        }
9966
9967 2
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
9968
9969
        if (
9970 2
            $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
9971
            &&
9972 2
            $offset >= 0 // grapheme_stripos() can't handle negative offset
9973
            &&
9974 2
            self::$SUPPORT['intl'] === true
9975
        ) {
9976
            $return_tmp = \grapheme_stripos($haystack, $needle, $offset);
9977
            if ($return_tmp !== false) {
9978
                return $return_tmp;
9979
            }
9980
        }
9981
9982
        //
9983
        // fallback for ascii only
9984
        //
9985
9986 2
        if (ASCII::is_ascii($haystack . $needle)) {
9987
            return \stripos($haystack, $needle, $offset);
9988
        }
9989
9990
        //
9991
        // fallback via vanilla php
9992
        //
9993
9994 2
        $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false);
9995 2
        $needle = self::strtocasefold($needle, true, false, $encoding, null, false);
9996
9997 2
        return self::strpos($haystack, $needle, $offset, $encoding);
9998
    }
9999
10000
    /**
10001
     * Returns all of haystack starting from and including the first occurrence of needle to the end.
10002
     *
10003
     * EXAMPLE: <code>
10004
     * $str = 'iñtërnâtiônàlizætiøn';
10005
     * $search = 'NÂT';
10006
     *
10007
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
10008
     * UTF8::stristr($str, $search, true)); // 'iñtër'
10009
     * </code>
10010
     *
10011
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
10012
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
10013
     * @param bool   $before_needle [optional] <p>
10014
     *                              If <b>TRUE</b>, it returns the part of the
10015
     *                              haystack before the first occurrence of the needle (excluding the needle).
10016
     *                              </p>
10017
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10018
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10019
     *
10020
     * @psalm-pure
10021
     *
10022
     * @return false|string
10023
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
10024
     */
10025
    public static function stristr(
10026
        string $haystack,
10027
        string $needle,
10028
        bool $before_needle = false,
10029
        string $encoding = 'UTF-8',
10030
        bool $clean_utf8 = false
10031
    ) {
10032 12
        if ($haystack === '' || $needle === '') {
10033 3
            return false;
10034
        }
10035
10036 9
        if ($clean_utf8) {
10037
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10038
            // if invalid characters are found in $haystack before $needle
10039 1
            $needle = self::clean($needle);
10040 1
            $haystack = self::clean($haystack);
10041
        }
10042
10043 9
        if (!$needle) {
10044
            return $haystack;
10045
        }
10046
10047 9
        if (self::$SUPPORT['mbstring'] === true) {
10048 9
            if ($encoding === 'UTF-8') {
10049 9
                return \mb_stristr($haystack, $needle, $before_needle);
10050
            }
10051
10052 1
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10053
10054 1
            return \mb_stristr($haystack, $needle, $before_needle, $encoding);
10055
        }
10056
10057
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
10058
10059
        if (
10060
            $encoding !== 'UTF-8'
10061
            &&
10062
            self::$SUPPORT['mbstring'] === false
10063
        ) {
10064
            /**
10065
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10066
             */
10067
            \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10068
        }
10069
10070
        if (
10071
            $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings
10072
            &&
10073
            self::$SUPPORT['intl'] === true
10074
        ) {
10075
            $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle);
10076
            if ($return_tmp !== false) {
10077
                return $return_tmp;
10078
            }
10079
        }
10080
10081
        if (ASCII::is_ascii($needle . $haystack)) {
10082
            return \stristr($haystack, $needle, $before_needle);
10083
        }
10084
10085
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match);
10086
10087
        if (!isset($match[1])) {
10088
            return false;
10089
        }
10090
10091
        if ($before_needle) {
10092
            return $match[1];
10093
        }
10094
10095
        return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding);
10096
    }
10097
10098
    /**
10099
     * Get the string length, not the byte-length!
10100
     *
10101
     * INFO: use UTF8::strwidth() for the char-length
10102
     *
10103
     * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code>
10104
     *
10105
     * @see http://php.net/manual/en/function.mb-strlen.php
10106
     *
10107
     * @param string $str        <p>The string being checked for length.</p>
10108
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10109
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10110
     *
10111
     * @psalm-pure
10112
     *
10113
     * @return false|int
10114
     *                   <p>
10115
     *                   The number <strong>(int)</strong> of characters in the string $str having character encoding
10116
     *                   $encoding.
10117
     *                   (One multi-byte character counted as +1).
10118
     *                   <br>
10119
     *                   Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid
10120
     *                   chars.
10121
     *                   </p>
10122
     */
10123
    public static function strlen(
10124
        string $str,
10125
        string $encoding = 'UTF-8',
10126
        bool $clean_utf8 = false
10127
    ) {
10128 174
        if ($str === '') {
10129 21
            return 0;
10130
        }
10131
10132 172
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10133 12
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10134
        }
10135
10136 172
        if ($clean_utf8) {
10137
            // "mb_strlen" and "\iconv_strlen" returns wrong length,
10138
            // if invalid characters are found in $str
10139 4
            $str = self::clean($str);
10140
        }
10141
10142
        //
10143
        // fallback via mbstring
10144
        //
10145
10146 172
        if (self::$SUPPORT['mbstring'] === true) {
10147 166
            if ($encoding === 'UTF-8') {
10148
                /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10149 166
                return @\mb_strlen($str);
10150
            }
10151
10152
            /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */
10153 4
            return @\mb_strlen($str, $encoding);
10154
        }
10155
10156
        //
10157
        // fallback for binary || ascii only
10158
        //
10159
10160
        if (
10161 8
            $encoding === 'CP850'
10162
            ||
10163 8
            $encoding === 'ASCII'
10164
        ) {
10165
            return \strlen($str);
10166
        }
10167
10168
        if (
10169 8
            $encoding !== 'UTF-8'
10170
            &&
10171 8
            self::$SUPPORT['mbstring'] === false
10172
            &&
10173 8
            self::$SUPPORT['iconv'] === false
10174
        ) {
10175
            /**
10176
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10177
             */
10178 2
            \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10179
        }
10180
10181
        //
10182
        // fallback via iconv
10183
        //
10184
10185 8
        if (self::$SUPPORT['iconv'] === true) {
10186
            $return_tmp = \iconv_strlen($str, $encoding);
10187
            if ($return_tmp !== false) {
10188
                return $return_tmp;
10189
            }
10190
        }
10191
10192
        //
10193
        // fallback via intl
10194
        //
10195
10196
        if (
10197 8
            $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings
10198
            &&
10199 8
            self::$SUPPORT['intl'] === true
10200
        ) {
10201
            $return_tmp = \grapheme_strlen($str);
10202
            if ($return_tmp !== null) {
10203
                return $return_tmp;
10204
            }
10205
        }
10206
10207
        //
10208
        // fallback for ascii only
10209
        //
10210
10211 8
        if (ASCII::is_ascii($str)) {
10212 4
            return \strlen($str);
10213
        }
10214
10215
        //
10216
        // fallback via vanilla php
10217
        //
10218
10219 8
        \preg_match_all('/./us', $str, $parts);
10220
10221 8
        $return_tmp = \count($parts[0]);
10222 8
        if ($return_tmp === 0) {
10223
            return false;
10224
        }
10225
10226 8
        return $return_tmp;
10227
    }
10228
10229
    /**
10230
     * Get string length in byte.
10231
     *
10232
     * @param string $str
10233
     *
10234
     * @psalm-pure
10235
     *
10236
     * @return int
10237
     */
10238
    public static function strlen_in_byte(string $str): int
10239
    {
10240 1
        if ($str === '') {
10241
            return 0;
10242
        }
10243
10244 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10245
            // "mb_" is available if overload is used, so use it ...
10246
            return \mb_strlen($str, 'CP850'); // 8-BIT
10247
        }
10248
10249 1
        return \strlen($str);
10250
    }
10251
10252
    /**
10253
     * Case-insensitive string comparisons using a "natural order" algorithm.
10254
     *
10255
     * INFO: natural order version of UTF8::strcasecmp()
10256
     *
10257
     * EXAMPLES: <code>
10258
     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
10259
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10260
     *
10261
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10262
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10263
     * </code>
10264
     *
10265
     * @param string $str1     <p>The first string.</p>
10266
     * @param string $str2     <p>The second string.</p>
10267
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10268
     *
10269
     * @psalm-pure
10270
     *
10271
     * @return int
10272
     *             <strong>&lt; 0</strong> if str1 is less than str2<br>
10273
     *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
10274
     *             <strong>0</strong> if they are equal
10275
     */
10276
    public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int
10277
    {
10278 2
        return self::strnatcmp(
10279 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10280 2
            self::strtocasefold($str2, true, false, $encoding, null, false)
10281
        );
10282
    }
10283
10284
    /**
10285
     * String comparisons using a "natural order" algorithm
10286
     *
10287
     * INFO: natural order version of UTF8::strcmp()
10288
     *
10289
     * EXAMPLES: <code>
10290
     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
10291
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
10292
     *
10293
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
10294
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
10295
     * </code>
10296
     *
10297
     * @see http://php.net/manual/en/function.strnatcmp.php
10298
     *
10299
     * @param string $str1 <p>The first string.</p>
10300
     * @param string $str2 <p>The second string.</p>
10301
     *
10302
     * @psalm-pure
10303
     *
10304
     * @return int
10305
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
10306
     *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
10307
     *             <strong>0</strong> if they are equal
10308
     */
10309
    public static function strnatcmp(string $str1, string $str2): int
10310
    {
10311 4
        if ($str1 === $str2) {
10312 4
            return 0;
10313
        }
10314
10315 4
        return \strnatcmp(
10316 4
            (string) self::strtonatfold($str1),
10317 4
            (string) self::strtonatfold($str2)
10318
        );
10319
    }
10320
10321
    /**
10322
     * Case-insensitive string comparison of the first n characters.
10323
     *
10324
     * EXAMPLE: <code>
10325
     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
10326
     * </code>
10327
     *
10328
     * @see http://php.net/manual/en/function.strncasecmp.php
10329
     *
10330
     * @param string $str1     <p>The first string.</p>
10331
     * @param string $str2     <p>The second string.</p>
10332
     * @param int    $len      <p>The length of strings to be used in the comparison.</p>
10333
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10334
     *
10335
     * @psalm-pure
10336
     *
10337
     * @return int
10338
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10339
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10340
     *             <strong>0</strong> if they are equal
10341
     */
10342
    public static function strncasecmp(
10343
        string $str1,
10344
        string $str2,
10345
        int $len,
10346
        string $encoding = 'UTF-8'
10347
    ): int {
10348 2
        return self::strncmp(
10349 2
            self::strtocasefold($str1, true, false, $encoding, null, false),
10350 2
            self::strtocasefold($str2, true, false, $encoding, null, false),
10351 2
            $len
10352
        );
10353
    }
10354
10355
    /**
10356
     * String comparison of the first n characters.
10357
     *
10358
     * EXAMPLE: <code>
10359
     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
10360
     * </code>
10361
     *
10362
     * @see http://php.net/manual/en/function.strncmp.php
10363
     *
10364
     * @param string $str1     <p>The first string.</p>
10365
     * @param string $str2     <p>The second string.</p>
10366
     * @param int    $len      <p>Number of characters to use in the comparison.</p>
10367
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10368
     *
10369
     * @psalm-pure
10370
     *
10371
     * @return int
10372
     *             <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
10373
     *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
10374
     *             <strong>0</strong> if they are equal
10375
     */
10376
    public static function strncmp(
10377
        string $str1,
10378
        string $str2,
10379
        int $len,
10380
        string $encoding = 'UTF-8'
10381
    ): int {
10382 4
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10383
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10384
        }
10385
10386 4
        if ($encoding === 'UTF-8') {
10387 4
            $str1 = (string) \mb_substr($str1, 0, $len);
10388 4
            $str2 = (string) \mb_substr($str2, 0, $len);
10389
        } else {
10390
            $str1 = (string) self::substr($str1, 0, $len, $encoding);
10391
            $str2 = (string) self::substr($str2, 0, $len, $encoding);
10392
        }
10393
10394 4
        return self::strcmp($str1, $str2);
10395
    }
10396
10397
    /**
10398
     * Search a string for any of a set of characters.
10399
     *
10400
     * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code>
10401
     *
10402
     * @see http://php.net/manual/en/function.strpbrk.php
10403
     *
10404
     * @param string $haystack  <p>The string where char_list is looked for.</p>
10405
     * @param string $char_list <p>This parameter is case-sensitive.</p>
10406
     *
10407
     * @psalm-pure
10408
     *
10409
     * @return false|string
10410
     *                      <p>The string starting from the character found, or false if it is not found.</p>
10411
     */
10412
    public static function strpbrk(string $haystack, string $char_list)
10413
    {
10414 2
        if ($haystack === '' || $char_list === '') {
10415 2
            return false;
10416
        }
10417
10418 2
        if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
10419 2
            return \substr($haystack, (int) \strpos($haystack, $m[0]));
10420
        }
10421
10422 2
        return false;
10423
    }
10424
10425
    /**
10426
     * Find the position of the first occurrence of a substring in a string.
10427
     *
10428
     * INFO: use UTF8::strpos_in_byte() for the byte-length
10429
     *
10430
     * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code>
10431
     *
10432
     * @see http://php.net/manual/en/function.mb-strpos.php
10433
     *
10434
     * @param string     $haystack   <p>The string from which to get the position of the first occurrence of needle.</p>
10435
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
10436
     * @param int        $offset     [optional] <p>The search offset. If it is not specified, 0 is used.</p>
10437
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10438
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10439
     *
10440
     * @psalm-pure
10441
     *
10442
     * @return false|int
10443
     *                   The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack
10444
     *                   string.<br> If needle is not found it returns false.
10445
     */
10446
    public static function strpos(
10447
        string $haystack,
10448
        $needle,
10449
        int $offset = 0,
10450
        string $encoding = 'UTF-8',
10451
        bool $clean_utf8 = false
10452
    ) {
10453 53
        if ($haystack === '') {
10454 4
            return false;
10455
        }
10456
10457
        // iconv and mbstring do not support integer $needle
10458 52
        if ((int) $needle === $needle) {
10459
            $needle = (string) self::chr($needle);
10460
        }
10461 52
        $needle = (string) $needle;
10462
10463 52
        if ($needle === '') {
10464 2
            return false;
10465
        }
10466
10467 52
        if ($clean_utf8) {
10468
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10469
            // if invalid characters are found in $haystack before $needle
10470 3
            $needle = self::clean($needle);
10471 3
            $haystack = self::clean($haystack);
10472
        }
10473
10474 52
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10475 10
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10476
        }
10477
10478
        //
10479
        // fallback via mbstring
10480
        //
10481
10482 52
        if (self::$SUPPORT['mbstring'] === true) {
10483 50
            if ($encoding === 'UTF-8') {
10484
                /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10485 50
                return @\mb_strpos($haystack, $needle, $offset);
10486
            }
10487
10488
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10489 2
            return @\mb_strpos($haystack, $needle, $offset, $encoding);
10490
        }
10491
10492
        //
10493
        // fallback for binary || ascii only
10494
        //
10495
        if (
10496 4
            $encoding === 'CP850'
10497
            ||
10498 4
            $encoding === 'ASCII'
10499
        ) {
10500 2
            return \strpos($haystack, $needle, $offset);
10501
        }
10502
10503
        if (
10504 4
            $encoding !== 'UTF-8'
10505
            &&
10506 4
            self::$SUPPORT['iconv'] === false
10507
            &&
10508 4
            self::$SUPPORT['mbstring'] === false
10509
        ) {
10510
            /**
10511
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10512
             */
10513 2
            \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10514
        }
10515
10516
        //
10517
        // fallback via intl
10518
        //
10519
10520
        if (
10521 4
            $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings
10522
            &&
10523 4
            $offset >= 0 // grapheme_strpos() can't handle negative offset
10524
            &&
10525 4
            self::$SUPPORT['intl'] === true
10526
        ) {
10527
            $return_tmp = \grapheme_strpos($haystack, $needle, $offset);
10528
            if ($return_tmp !== false) {
10529
                return $return_tmp;
10530
            }
10531
        }
10532
10533
        //
10534
        // fallback via iconv
10535
        //
10536
10537
        if (
10538 4
            $offset >= 0 // iconv_strpos() can't handle negative offset
10539
            &&
10540 4
            self::$SUPPORT['iconv'] === true
10541
        ) {
10542
            // ignore invalid negative offset to keep compatibility
10543
            // with php < 5.5.35, < 5.6.21, < 7.0.6
10544
            $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
10545
            if ($return_tmp !== false) {
10546
                return $return_tmp;
10547
            }
10548
        }
10549
10550
        //
10551
        // fallback for ascii only
10552
        //
10553
10554 4
        if (ASCII::is_ascii($haystack . $needle)) {
10555
            /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */
10556 2
            return @\strpos($haystack, $needle, $offset);
10557
        }
10558
10559
        //
10560
        // fallback via vanilla php
10561
        //
10562
10563 4
        $haystack_tmp = self::substr($haystack, $offset, null, $encoding);
10564 4
        if ($haystack_tmp === false) {
10565
            $haystack_tmp = '';
10566
        }
10567 4
        $haystack = (string) $haystack_tmp;
10568
10569 4
        if ($offset < 0) {
10570
            $offset = 0;
10571
        }
10572
10573 4
        $pos = \strpos($haystack, $needle);
10574 4
        if ($pos === false) {
10575 2
            return false;
10576
        }
10577
10578 4
        if ($pos) {
10579 4
            return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding);
10580
        }
10581
10582 2
        return $offset + 0;
10583
    }
10584
10585
    /**
10586
     * Find the position of the first occurrence of a substring in a string.
10587
     *
10588
     * @param string $haystack <p>
10589
     *                         The string being checked.
10590
     *                         </p>
10591
     * @param string $needle   <p>
10592
     *                         The position counted from the beginning of haystack.
10593
     *                         </p>
10594
     * @param int    $offset   [optional] <p>
10595
     *                         The search offset. If it is not specified, 0 is used.
10596
     *                         </p>
10597
     *
10598
     * @psalm-pure
10599
     *
10600
     * @return false|int
10601
     *                   <p>The numeric position of the first occurrence of needle in the
10602
     *                   haystack string. If needle is not found, it returns false.</p>
10603
     */
10604
    public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0)
10605
    {
10606 2
        if ($haystack === '' || $needle === '') {
10607
            return false;
10608
        }
10609
10610 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10611
            // "mb_" is available if overload is used, so use it ...
10612
            return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10613
        }
10614
10615 2
        return \strpos($haystack, $needle, $offset);
10616
    }
10617
10618
    /**
10619
     * Find the position of the first occurrence of a substring in a string, case-insensitive.
10620
     *
10621
     * @param string $haystack <p>
10622
     *                         The string being checked.
10623
     *                         </p>
10624
     * @param string $needle   <p>
10625
     *                         The position counted from the beginning of haystack.
10626
     *                         </p>
10627
     * @param int    $offset   [optional] <p>
10628
     *                         The search offset. If it is not specified, 0 is used.
10629
     *                         </p>
10630
     *
10631
     * @psalm-pure
10632
     *
10633
     * @return false|int
10634
     *                   <p>The numeric position of the first occurrence of needle in the
10635
     *                   haystack string. If needle is not found, it returns false.</p>
10636
     */
10637
    public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0)
10638
    {
10639 2
        if ($haystack === '' || $needle === '') {
10640
            return false;
10641
        }
10642
10643 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
10644
            // "mb_" is available if overload is used, so use it ...
10645
            return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
10646
        }
10647
10648 2
        return \stripos($haystack, $needle, $offset);
10649
    }
10650
10651
    /**
10652
     * Find the last occurrence of a character in a string within another.
10653
     *
10654
     * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code>
10655
     *
10656
     * @see http://php.net/manual/en/function.mb-strrchr.php
10657
     *
10658
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10659
     * @param string $needle        <p>The string to find in haystack</p>
10660
     * @param bool   $before_needle [optional] <p>
10661
     *                              Determines which portion of haystack
10662
     *                              this function returns.
10663
     *                              If set to true, it returns all of haystack
10664
     *                              from the beginning to the last occurrence of needle.
10665
     *                              If set to false, it returns all of haystack
10666
     *                              from the last occurrence of needle to the end,
10667
     *                              </p>
10668
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10669
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10670
     *
10671
     * @psalm-pure
10672
     *
10673
     * @return false|string
10674
     *                      <p>The portion of haystack or false if needle is not found.</p>
10675
     */
10676
    public static function strrchr(
10677
        string $haystack,
10678
        string $needle,
10679
        bool $before_needle = false,
10680
        string $encoding = 'UTF-8',
10681
        bool $clean_utf8 = false
10682
    ) {
10683 2
        if ($haystack === '' || $needle === '') {
10684 2
            return false;
10685
        }
10686
10687 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10688 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10689
        }
10690
10691 2
        if ($clean_utf8) {
10692
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10693
            // if invalid characters are found in $haystack before $needle
10694 2
            $needle = self::clean($needle);
10695 2
            $haystack = self::clean($haystack);
10696
        }
10697
10698
        //
10699
        // fallback via mbstring
10700
        //
10701
10702 2
        if (self::$SUPPORT['mbstring'] === true) {
10703 2
            if ($encoding === 'UTF-8') {
10704 2
                return \mb_strrchr($haystack, $needle, $before_needle);
10705
            }
10706
10707 2
            return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
10708
        }
10709
10710
        //
10711
        // fallback for binary || ascii only
10712
        //
10713
10714
        if (
10715
            !$before_needle
10716
            &&
10717
            (
10718
                $encoding === 'CP850'
10719
                ||
10720
                $encoding === 'ASCII'
10721
            )
10722
        ) {
10723
            return \strrchr($haystack, $needle);
10724
        }
10725
10726
        if (
10727
            $encoding !== 'UTF-8'
10728
            &&
10729
            self::$SUPPORT['mbstring'] === false
10730
        ) {
10731
            /**
10732
             * @psalm-suppress ImpureFunctionCall - is is only a warning
10733
             */
10734
            \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
10735
        }
10736
10737
        //
10738
        // fallback via iconv
10739
        //
10740
10741
        if (self::$SUPPORT['iconv'] === true) {
10742
            $needle_tmp = self::substr($needle, 0, 1, $encoding);
10743
            if ($needle_tmp === false) {
10744
                return false;
10745
            }
10746
            $needle = (string) $needle_tmp;
10747
10748
            $pos = \iconv_strrpos($haystack, $needle, $encoding);
10749
            if ($pos === false) {
10750
                return false;
10751
            }
10752
10753
            if ($before_needle) {
10754
                return self::substr($haystack, 0, $pos, $encoding);
10755
            }
10756
10757
            return self::substr($haystack, $pos, null, $encoding);
10758
        }
10759
10760
        //
10761
        // fallback via vanilla php
10762
        //
10763
10764
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10765
        if ($needle_tmp === false) {
10766
            return false;
10767
        }
10768
        $needle = (string) $needle_tmp;
10769
10770
        $pos = self::strrpos($haystack, $needle, 0, $encoding);
10771
        if ($pos === false) {
10772
            return false;
10773
        }
10774
10775
        if ($before_needle) {
10776
            return self::substr($haystack, 0, $pos, $encoding);
10777
        }
10778
10779
        return self::substr($haystack, $pos, null, $encoding);
10780
    }
10781
10782
    /**
10783
     * Reverses characters order in the string.
10784
     *
10785
     * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code>
10786
     *
10787
     * @param string $str      <p>The input string.</p>
10788
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
10789
     *
10790
     * @psalm-pure
10791
     *
10792
     * @return string
10793
     *                <p>The string with characters in the reverse sequence.</p>
10794
     */
10795
    public static function strrev(string $str, string $encoding = 'UTF-8'): string
10796
    {
10797 10
        if ($str === '') {
10798 4
            return '';
10799
        }
10800
10801
        // init
10802 8
        $reversed = '';
10803
10804 8
        $str = self::emoji_encode($str, true);
10805
10806 8
        if ($encoding === 'UTF-8') {
10807 8
            if (self::$SUPPORT['intl'] === true) {
10808
                // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8
10809 8
                $i = (int) \grapheme_strlen($str);
10810 8
                while ($i--) {
10811 8
                    $reversed_tmp = \grapheme_substr($str, $i, 1);
10812 8
                    if ($reversed_tmp !== false) {
10813 8
                        $reversed .= $reversed_tmp;
10814
                    }
10815
                }
10816
            } else {
10817
                $i = (int) \mb_strlen($str);
10818 8
                while ($i--) {
10819
                    $reversed_tmp = \mb_substr($str, $i, 1);
10820
                    if ($reversed_tmp !== false) {
10821
                        $reversed .= $reversed_tmp;
10822
                    }
10823
                }
10824
            }
10825
        } else {
10826
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10827
10828
            $i = (int) self::strlen($str, $encoding);
10829
            while ($i--) {
10830
                $reversed_tmp = self::substr($str, $i, 1, $encoding);
10831
                if ($reversed_tmp !== false) {
10832
                    $reversed .= $reversed_tmp;
10833
                }
10834
            }
10835
        }
10836
10837 8
        return self::emoji_decode($reversed, true);
10838
    }
10839
10840
    /**
10841
     * Find the last occurrence of a character in a string within another, case-insensitive.
10842
     *
10843
     * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code>
10844
     *
10845
     * @see http://php.net/manual/en/function.mb-strrichr.php
10846
     *
10847
     * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
10848
     * @param string $needle        <p>The string to find in haystack.</p>
10849
     * @param bool   $before_needle [optional] <p>
10850
     *                              Determines which portion of haystack
10851
     *                              this function returns.
10852
     *                              If set to true, it returns all of haystack
10853
     *                              from the beginning to the last occurrence of needle.
10854
     *                              If set to false, it returns all of haystack
10855
     *                              from the last occurrence of needle to the end,
10856
     *                              </p>
10857
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
10858
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
10859
     *
10860
     * @psalm-pure
10861
     *
10862
     * @return false|string
10863
     *                      <p>The portion of haystack or<br>false if needle is not found.</p>
10864
     */
10865
    public static function strrichr(
10866
        string $haystack,
10867
        string $needle,
10868
        bool $before_needle = false,
10869
        string $encoding = 'UTF-8',
10870
        bool $clean_utf8 = false
10871
    ) {
10872 3
        if ($haystack === '' || $needle === '') {
10873 2
            return false;
10874
        }
10875
10876 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10877 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10878
        }
10879
10880 3
        if ($clean_utf8) {
10881
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
10882
            // if invalid characters are found in $haystack before $needle
10883 2
            $needle = self::clean($needle);
10884 2
            $haystack = self::clean($haystack);
10885
        }
10886
10887
        //
10888
        // fallback via mbstring
10889
        //
10890
10891 3
        if (self::$SUPPORT['mbstring'] === true) {
10892 3
            if ($encoding === 'UTF-8') {
10893 3
                return \mb_strrichr($haystack, $needle, $before_needle);
10894
            }
10895
10896 2
            return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
10897
        }
10898
10899
        //
10900
        // fallback via vanilla php
10901
        //
10902
10903
        $needle_tmp = self::substr($needle, 0, 1, $encoding);
10904
        if ($needle_tmp === false) {
10905
            return false;
10906
        }
10907
        $needle = (string) $needle_tmp;
10908
10909
        $pos = self::strripos($haystack, $needle, 0, $encoding);
10910
        if ($pos === false) {
10911
            return false;
10912
        }
10913
10914
        if ($before_needle) {
10915
            return self::substr($haystack, 0, $pos, $encoding);
10916
        }
10917
10918
        return self::substr($haystack, $pos, null, $encoding);
10919
    }
10920
10921
    /**
10922
     * Find the position of the last occurrence of a substring in a string, case-insensitive.
10923
     *
10924
     * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
10925
     *
10926
     * @param string     $haystack   <p>The string to look in.</p>
10927
     * @param int|string $needle     <p>The string to look for.</p>
10928
     * @param int        $offset     [optional] <p>Number of characters to ignore in the beginning or end.</p>
10929
     * @param string     $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
10930
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
10931
     *
10932
     * @psalm-pure
10933
     *
10934
     * @return false|int
10935
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
10936
     *                   string.<br>If needle is not found, it returns false.</p>
10937
     */
10938
    public static function strripos(
10939
        string $haystack,
10940
        $needle,
10941
        int $offset = 0,
10942
        string $encoding = 'UTF-8',
10943
        bool $clean_utf8 = false
10944
    ) {
10945 14
        if ($haystack === '') {
10946
            return false;
10947
        }
10948
10949
        // iconv and mbstring do not support integer $needle
10950 14
        if ((int) $needle === $needle && $needle >= 0) {
10951
            $needle = (string) self::chr($needle);
10952
        }
10953 14
        $needle = (string) $needle;
10954
10955 14
        if ($needle === '') {
10956
            return false;
10957
        }
10958
10959 14
        if ($clean_utf8) {
10960
            // mb_strripos() && iconv_strripos() is not tolerant to invalid characters
10961 3
            $needle = self::clean($needle);
10962 3
            $haystack = self::clean($haystack);
10963
        }
10964
10965 14
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
10966 9
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
10967
        }
10968
10969
        //
10970
        // fallback via mbstrig
10971
        //
10972
10973 14
        if (self::$SUPPORT['mbstring'] === true) {
10974 14
            if ($encoding === 'UTF-8') {
10975 14
                return \mb_strripos($haystack, $needle, $offset);
10976
            }
10977
10978
            return \mb_strripos($haystack, $needle, $offset, $encoding);
10979
        }
10980
10981
        //
10982
        // fallback for binary || ascii only
10983
        //
10984
10985
        if (
10986
            $encoding === 'CP850'
10987
            ||
10988
            $encoding === 'ASCII'
10989
        ) {
10990
            return \strripos($haystack, $needle, $offset);
10991
        }
10992
10993
        if (
10994
            $encoding !== 'UTF-8'
10995
            &&
10996
            self::$SUPPORT['mbstring'] === false
10997
        ) {
10998
            /**
10999
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11000
             */
11001
            \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11002
        }
11003
11004
        //
11005
        // fallback via intl
11006
        //
11007
11008
        if (
11009
            $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings
11010
            &&
11011
            $offset >= 0 // grapheme_strripos() can't handle negative offset
11012
            &&
11013
            self::$SUPPORT['intl'] === true
11014
        ) {
11015
            $return_tmp = \grapheme_strripos($haystack, $needle, $offset);
11016
            if ($return_tmp !== false) {
11017
                return $return_tmp;
11018
            }
11019
        }
11020
11021
        //
11022
        // fallback for ascii only
11023
        //
11024
11025
        if (ASCII::is_ascii($haystack . $needle)) {
11026
            return \strripos($haystack, $needle, $offset);
11027
        }
11028
11029
        //
11030
        // fallback via vanilla php
11031
        //
11032
11033
        $haystack = self::strtocasefold($haystack, true, false, $encoding);
11034
        $needle = self::strtocasefold($needle, true, false, $encoding);
11035
11036
        return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8);
11037
    }
11038
11039
    /**
11040
     * Finds position of last occurrence of a string within another, case-insensitive.
11041
     *
11042
     * @param string $haystack <p>
11043
     *                         The string from which to get the position of the last occurrence
11044
     *                         of needle.
11045
     *                         </p>
11046
     * @param string $needle   <p>
11047
     *                         The string to find in haystack.
11048
     *                         </p>
11049
     * @param int    $offset   [optional] <p>
11050
     *                         The position in haystack
11051
     *                         to start searching.
11052
     *                         </p>
11053
     *
11054
     * @psalm-pure
11055
     *
11056
     * @return false|int
11057
     *                   <p>eturn the numeric position of the last occurrence of needle in the
11058
     *                   haystack string, or false if needle is not found.</p>
11059
     */
11060
    public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0)
11061
    {
11062 2
        if ($haystack === '' || $needle === '') {
11063
            return false;
11064
        }
11065
11066 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11067
            // "mb_" is available if overload is used, so use it ...
11068
            return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11069
        }
11070
11071 2
        return \strripos($haystack, $needle, $offset);
11072
    }
11073
11074
    /**
11075
     * Find the position of the last occurrence of a substring in a string.
11076
     *
11077
     * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code>
11078
     *
11079
     * @see http://php.net/manual/en/function.mb-strrpos.php
11080
     *
11081
     * @param string     $haystack   <p>The string being checked, for the last occurrence of needle</p>
11082
     * @param int|string $needle     <p>The string to find in haystack.<br>Or a code point as int.</p>
11083
     * @param int        $offset     [optional] <p>May be specified to begin searching an arbitrary number of characters
11084
     *                               into the string. Negative values will stop searching at an arbitrary point prior to
11085
     *                               the end of the string.
11086
     *                               </p>
11087
     * @param string     $encoding   [optional] <p>Set the charset.</p>
11088
     * @param bool       $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11089
     *
11090
     * @psalm-pure
11091
     *
11092
     * @return false|int
11093
     *                   <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack
11094
     *                   string.<br>If needle is not found, it returns false.</p>
11095
     */
11096
    public static function strrpos(
11097
        string $haystack,
11098
        $needle,
11099
        int $offset = 0,
11100
        string $encoding = 'UTF-8',
11101
        bool $clean_utf8 = false
11102
    ) {
11103 35
        if ($haystack === '') {
11104 3
            return false;
11105
        }
11106
11107
        // iconv and mbstring do not support integer $needle
11108 34
        if ((int) $needle === $needle && $needle >= 0) {
11109 1
            $needle = (string) self::chr($needle);
11110
        }
11111 34
        $needle = (string) $needle;
11112
11113 34
        if ($needle === '') {
11114 2
            return false;
11115
        }
11116
11117 34
        if ($clean_utf8) {
11118
            // mb_strrpos && iconv_strrpos is not tolerant to invalid characters
11119 4
            $needle = self::clean($needle);
11120 4
            $haystack = self::clean($haystack);
11121
        }
11122
11123 34
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11124 8
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11125
        }
11126
11127
        //
11128
        // fallback via mbstring
11129
        //
11130
11131 34
        if (self::$SUPPORT['mbstring'] === true) {
11132 34
            if ($encoding === 'UTF-8') {
11133 34
                return \mb_strrpos($haystack, $needle, $offset);
11134
            }
11135
11136 2
            return \mb_strrpos($haystack, $needle, $offset, $encoding);
11137
        }
11138
11139
        //
11140
        // fallback for binary || ascii only
11141
        //
11142
11143
        if (
11144
            $encoding === 'CP850'
11145
            ||
11146
            $encoding === 'ASCII'
11147
        ) {
11148
            return \strrpos($haystack, $needle, $offset);
11149
        }
11150
11151
        if (
11152
            $encoding !== 'UTF-8'
11153
            &&
11154
            self::$SUPPORT['mbstring'] === false
11155
        ) {
11156
            /**
11157
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11158
             */
11159
            \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11160
        }
11161
11162
        //
11163
        // fallback via intl
11164
        //
11165
11166
        if (
11167
            $offset >= 0 // grapheme_strrpos() can't handle negative offset
11168
            &&
11169
            $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings
11170
            &&
11171
            self::$SUPPORT['intl'] === true
11172
        ) {
11173
            $return_tmp = \grapheme_strrpos($haystack, $needle, $offset);
11174
            if ($return_tmp !== false) {
11175
                return $return_tmp;
11176
            }
11177
        }
11178
11179
        //
11180
        // fallback for ascii only
11181
        //
11182
11183
        if (ASCII::is_ascii($haystack . $needle)) {
11184
            return \strrpos($haystack, $needle, $offset);
11185
        }
11186
11187
        //
11188
        // fallback via vanilla php
11189
        //
11190
11191
        $haystack_tmp = null;
11192
        if ($offset > 0) {
11193
            $haystack_tmp = self::substr($haystack, $offset);
11194
        } elseif ($offset < 0) {
11195
            $haystack_tmp = self::substr($haystack, 0, $offset);
11196
            $offset = 0;
11197
        }
11198
11199
        if ($haystack_tmp !== null) {
11200
            if ($haystack_tmp === false) {
11201
                $haystack_tmp = '';
11202
            }
11203
            $haystack = (string) $haystack_tmp;
11204
        }
11205
11206
        $pos = \strrpos($haystack, $needle);
11207
        if ($pos === false) {
11208
            return false;
11209
        }
11210
11211
        /** @var false|string $str_tmp - needed for PhpStan (stubs error) */
11212
        $str_tmp = \substr($haystack, 0, $pos);
11213
        if ($str_tmp === false) {
11214
            return false;
11215
        }
11216
11217
        return $offset + (int) self::strlen($str_tmp);
11218
    }
11219
11220
    /**
11221
     * Find the position of the last occurrence of a substring in a string.
11222
     *
11223
     * @param string $haystack <p>
11224
     *                         The string being checked, for the last occurrence
11225
     *                         of needle.
11226
     *                         </p>
11227
     * @param string $needle   <p>
11228
     *                         The string to find in haystack.
11229
     *                         </p>
11230
     * @param int    $offset   [optional] <p>May be specified to begin searching an arbitrary number of characters into
11231
     *                         the string. Negative values will stop searching at an arbitrary point
11232
     *                         prior to the end of the string.
11233
     *                         </p>
11234
     *
11235
     * @psalm-pure
11236
     *
11237
     * @return false|int
11238
     *                   <p>The numeric position of the last occurrence of needle in the
11239
     *                   haystack string. If needle is not found, it returns false.</p>
11240
     */
11241
    public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0)
11242
    {
11243 2
        if ($haystack === '' || $needle === '') {
11244
            return false;
11245
        }
11246
11247 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11248
            // "mb_" is available if overload is used, so use it ...
11249
            return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT
11250
        }
11251
11252 2
        return \strrpos($haystack, $needle, $offset);
11253
    }
11254
11255
    /**
11256
     * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
11257
     * mask.
11258
     *
11259
     * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code>
11260
     *
11261
     * @param string   $str      <p>The input string.</p>
11262
     * @param string   $mask     <p>The mask of chars</p>
11263
     * @param int      $offset   [optional]
11264
     * @param int|null $length   [optional]
11265
     * @param string   $encoding [optional] <p>Set the charset.</p>
11266
     *
11267
     * @psalm-pure
11268
     *
11269
     * @return false|int
11270
     */
11271
    public static function strspn(
11272
        string $str,
11273
        string $mask,
11274
        int $offset = 0,
11275
        int $length = null,
11276
        string $encoding = 'UTF-8'
11277
    ) {
11278 10
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11279
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11280
        }
11281
11282 10
        if ($offset || $length !== null) {
11283 2
            if ($encoding === 'UTF-8') {
11284 2
                if ($length === null) {
11285
                    $str = (string) \mb_substr($str, $offset);
11286
                } else {
11287 2
                    $str = (string) \mb_substr($str, $offset, $length);
11288
                }
11289
            } else {
11290
                $str = (string) self::substr($str, $offset, $length, $encoding);
11291
            }
11292
        }
11293
11294 10
        if ($str === '' || $mask === '') {
11295 2
            return 0;
11296
        }
11297
11298 8
        $matches = [];
11299
11300 8
        return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0;
11301
    }
11302
11303
    /**
11304
     * Returns part of haystack string from the first occurrence of needle to the end of haystack.
11305
     *
11306
     * EXAMPLE: <code>
11307
     * $str = 'iñtërnâtiônàlizætiøn';
11308
     * $search = 'nât';
11309
     *
11310
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
11311
     * UTF8::strstr($str, $search, true)); // 'iñtër'
11312
     * </code>
11313
     *
11314
     * @param string $haystack      <p>The input string. Must be valid UTF-8.</p>
11315
     * @param string $needle        <p>The string to look for. Must be valid UTF-8.</p>
11316
     * @param bool   $before_needle [optional] <p>
11317
     *                              If <b>TRUE</b>, strstr() returns the part of the
11318
     *                              haystack before the first occurrence of the needle (excluding the needle).
11319
     *                              </p>
11320
     * @param string $encoding      [optional] <p>Set the charset for e.g. "mb_" function</p>
11321
     * @param bool   $clean_utf8    [optional] <p>Remove non UTF-8 chars from the string.</p>
11322
     *
11323
     * @psalm-pure
11324
     *
11325
     * @return false|string
11326
     *                      <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p>
11327
     */
11328
    public static function strstr(
11329
        string $haystack,
11330
        string $needle,
11331
        bool $before_needle = false,
11332
        string $encoding = 'UTF-8',
11333
        bool $clean_utf8 = false
11334
    ) {
11335 3
        if ($haystack === '' || $needle === '') {
11336 2
            return false;
11337
        }
11338
11339 3
        if ($clean_utf8) {
11340
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11341
            // if invalid characters are found in $haystack before $needle
11342
            $needle = self::clean($needle);
11343
            $haystack = self::clean($haystack);
11344
        }
11345
11346 3
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11347 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11348
        }
11349
11350
        //
11351
        // fallback via mbstring
11352
        //
11353
11354 3
        if (self::$SUPPORT['mbstring'] === true) {
11355 3
            if ($encoding === 'UTF-8') {
11356 3
                return \mb_strstr($haystack, $needle, $before_needle);
11357
            }
11358
11359 2
            return \mb_strstr($haystack, $needle, $before_needle, $encoding);
11360
        }
11361
11362
        //
11363
        // fallback for binary || ascii only
11364
        //
11365
11366
        if (
11367
            $encoding === 'CP850'
11368
            ||
11369
            $encoding === 'ASCII'
11370
        ) {
11371
            return \strstr($haystack, $needle, $before_needle);
11372
        }
11373
11374
        if (
11375
            $encoding !== 'UTF-8'
11376
            &&
11377
            self::$SUPPORT['mbstring'] === false
11378
        ) {
11379
            /**
11380
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11381
             */
11382
            \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11383
        }
11384
11385
        //
11386
        // fallback via intl
11387
        //
11388
11389
        if (
11390
            $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings
11391
            &&
11392
            self::$SUPPORT['intl'] === true
11393
        ) {
11394
            $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle);
11395
            if ($return_tmp !== false) {
11396
                return $return_tmp;
11397
            }
11398
        }
11399
11400
        //
11401
        // fallback for ascii only
11402
        //
11403
11404
        if (ASCII::is_ascii($haystack . $needle)) {
11405
            return \strstr($haystack, $needle, $before_needle);
11406
        }
11407
11408
        //
11409
        // fallback via vanilla php
11410
        //
11411
11412
        \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match);
11413
11414
        if (!isset($match[1])) {
11415
            return false;
11416
        }
11417
11418
        if ($before_needle) {
11419
            return $match[1];
11420
        }
11421
11422
        return self::substr($haystack, (int) self::strlen($match[1]));
11423
    }
11424
11425
    /**
11426
     * Finds first occurrence of a string within another.
11427
     *
11428
     * @param string $haystack      <p>
11429
     *                              The string from which to get the first occurrence
11430
     *                              of needle.
11431
     *                              </p>
11432
     * @param string $needle        <p>
11433
     *                              The string to find in haystack.
11434
     *                              </p>
11435
     * @param bool   $before_needle [optional] <p>
11436
     *                              Determines which portion of haystack
11437
     *                              this function returns.
11438
     *                              If set to true, it returns all of haystack
11439
     *                              from the beginning to the first occurrence of needle.
11440
     *                              If set to false, it returns all of haystack
11441
     *                              from the first occurrence of needle to the end,
11442
     *                              </p>
11443
     *
11444
     * @psalm-pure
11445
     *
11446
     * @return false|string
11447
     *                      <p>The portion of haystack,
11448
     *                      or false if needle is not found.</p>
11449
     */
11450
    public static function strstr_in_byte(
11451
        string $haystack,
11452
        string $needle,
11453
        bool $before_needle = false
11454
    ) {
11455 2
        if ($haystack === '' || $needle === '') {
11456
            return false;
11457
        }
11458
11459 2
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
11460
            // "mb_" is available if overload is used, so use it ...
11461
            return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT
11462
        }
11463
11464 2
        return \strstr($haystack, $needle, $before_needle);
11465
    }
11466
11467
    /**
11468
     * Unicode transformation for case-less matching.
11469
     *
11470
     * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code>
11471
     *
11472
     * @see http://unicode.org/reports/tr21/tr21-5.html
11473
     *
11474
     * @param string      $str        <p>The input string.</p>
11475
     * @param bool        $full       [optional] <p>
11476
     *                                <b>true</b>, replace full case folding chars (default)<br>
11477
     *                                <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD]
11478
     *                                </p>
11479
     * @param bool        $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11480
     * @param string      $encoding   [optional] <p>Set the charset.</p>
11481
     * @param string|null $lang       [optional] <p>Set the language for special cases: az, el, lt, tr</p>
11482
     * @param bool        $lower      [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase
11483
     *                                is for some languages better ...</p>
11484
     *
11485
     * @psalm-pure
11486
     *
11487
     * @return string
11488
     */
11489
    public static function strtocasefold(
11490
        string $str,
11491
        bool $full = true,
11492
        bool $clean_utf8 = false,
11493
        string $encoding = 'UTF-8',
11494
        string $lang = null,
11495
        bool $lower = true
11496
    ): string {
11497 32
        if ($str === '') {
11498 5
            return '';
11499
        }
11500
11501 31
        if ($clean_utf8) {
11502
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11503
            // if invalid characters are found in $haystack before $needle
11504 2
            $str = self::clean($str);
11505
        }
11506
11507 31
        $str = self::fixStrCaseHelper($str, $lower, $full);
11508
11509 31
        if ($lang === null && $encoding === 'UTF-8') {
11510 31
            if ($lower) {
11511 2
                return \mb_strtolower($str);
11512
            }
11513
11514 29
            return \mb_strtoupper($str);
11515
        }
11516
11517 2
        if ($lower) {
11518
            return self::strtolower($str, $encoding, false, $lang);
11519
        }
11520
11521 2
        return self::strtoupper($str, $encoding, false, $lang);
11522
    }
11523
11524
    /**
11525
     * Make a string lowercase.
11526
     *
11527
     * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code>
11528
     *
11529
     * @see http://php.net/manual/en/function.mb-strtolower.php
11530
     *
11531
     * @param string      $str                           <p>The string being lowercased.</p>
11532
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
11533
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11534
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11535
     *                                                   tr</p>
11536
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11537
     *                                                   -> ß</p>
11538
     *
11539
     * @psalm-pure
11540
     *
11541
     * @return string
11542
     *                <p>String with all alphabetic characters converted to lowercase.</p>
11543
     */
11544
    public static function strtolower(
11545
        $str,
11546
        string $encoding = 'UTF-8',
11547
        bool $clean_utf8 = false,
11548
        string $lang = null,
11549
        bool $try_to_keep_the_string_length = false
11550
    ): string {
11551
        // init
11552 73
        $str = (string) $str;
11553
11554 73
        if ($str === '') {
11555 1
            return '';
11556
        }
11557
11558 72
        if ($clean_utf8) {
11559
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11560
            // if invalid characters are found in $haystack before $needle
11561 2
            $str = self::clean($str);
11562
        }
11563
11564
        // hack for old php version or for the polyfill ...
11565 72
        if ($try_to_keep_the_string_length) {
11566
            $str = self::fixStrCaseHelper($str, true);
11567
        }
11568
11569 72
        if ($lang === null && $encoding === 'UTF-8') {
11570 13
            return \mb_strtolower($str);
11571
        }
11572
11573 61
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11574
11575 61
        if ($lang !== null) {
11576 2
            if (self::$SUPPORT['intl'] === true) {
11577 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11578
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11579
                }
11580
11581 2
                $language_code = $lang . '-Lower';
11582 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11583
                    /**
11584
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11585
                     */
11586
                    \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING);
11587
11588
                    $language_code = 'Any-Lower';
11589
                }
11590
11591
                /** @noinspection PhpComposerExtensionStubsInspection */
11592
                /** @noinspection UnnecessaryCastingInspection */
11593 2
                return (string) \transliterator_transliterate($language_code, $str);
11594
            }
11595
11596
            /**
11597
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11598
             */
11599
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING);
11600
        }
11601
11602
        // always fallback via symfony polyfill
11603 61
        return \mb_strtolower($str, $encoding);
11604
    }
11605
11606
    /**
11607
     * Make a string uppercase.
11608
     *
11609
     * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code>
11610
     *
11611
     * @see http://php.net/manual/en/function.mb-strtoupper.php
11612
     *
11613
     * @param string      $str                           <p>The string being uppercased.</p>
11614
     * @param string      $encoding                      [optional] <p>Set the charset.</p>
11615
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
11616
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
11617
     *                                                   tr</p>
11618
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
11619
     *                                                   -> ß</p>
11620
     *
11621
     * @psalm-pure
11622
     *
11623
     * @return string
11624
     *                <p>String with all alphabetic characters converted to uppercase.</p>
11625
     */
11626
    public static function strtoupper(
11627
        $str,
11628
        string $encoding = 'UTF-8',
11629
        bool $clean_utf8 = false,
11630
        string $lang = null,
11631
        bool $try_to_keep_the_string_length = false
11632
    ): string {
11633
        // init
11634 17
        $str = (string) $str;
11635
11636 17
        if ($str === '') {
11637 1
            return '';
11638
        }
11639
11640 16
        if ($clean_utf8) {
11641
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
11642
            // if invalid characters are found in $haystack before $needle
11643 2
            $str = self::clean($str);
11644
        }
11645
11646
        // hack for old php version or for the polyfill ...
11647 16
        if ($try_to_keep_the_string_length) {
11648 2
            $str = self::fixStrCaseHelper($str);
11649
        }
11650
11651 16
        if ($lang === null && $encoding === 'UTF-8') {
11652 8
            return \mb_strtoupper($str);
11653
        }
11654
11655 10
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
11656
11657 10
        if ($lang !== null) {
11658 2
            if (self::$SUPPORT['intl'] === true) {
11659 2
                if (self::$INTL_TRANSLITERATOR_LIST === null) {
11660
                    self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list');
11661
                }
11662
11663 2
                $language_code = $lang . '-Upper';
11664 2
                if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) {
11665
                    /**
11666
                     * @psalm-suppress ImpureFunctionCall - is is only a warning
11667
                     */
11668
                    \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING);
11669
11670
                    $language_code = 'Any-Upper';
11671
                }
11672
11673
                /** @noinspection PhpComposerExtensionStubsInspection */
11674
                /** @noinspection UnnecessaryCastingInspection */
11675 2
                return (string) \transliterator_transliterate($language_code, $str);
11676
            }
11677
11678
            /**
11679
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11680
             */
11681
            \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING);
11682
        }
11683
11684
        // always fallback via symfony polyfill
11685 10
        return \mb_strtoupper($str, $encoding);
11686
    }
11687
11688
    /**
11689
     * Translate characters or replace sub-strings.
11690
     *
11691
     * EXAMPLE:
11692
     * <code>
11693
     * $array = [
11694
     *     'Hello'   => '○●◎',
11695
     *     '中文空白' => 'earth',
11696
     * ];
11697
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
11698
     * </code>
11699
     *
11700
     * @see http://php.net/manual/en/function.strtr.php
11701
     *
11702
     * @param string          $str  <p>The string being translated.</p>
11703
     * @param string|string[] $from <p>The string replacing from.</p>
11704
     * @param string|string[] $to   [optional] <p>The string being translated to to.</p>
11705
     *
11706
     * @psalm-pure
11707
     *
11708
     * @return string
11709
     *                <p>This function returns a copy of str, translating all occurrences of each character in "from"
11710
     *                to the corresponding character in "to".</p>
11711
     */
11712
    public static function strtr(string $str, $from, $to = ''): string
11713
    {
11714 2
        if ($str === '') {
11715
            return '';
11716
        }
11717
11718 2
        if ($from === $to) {
11719
            return $str;
11720
        }
11721
11722 2
        if ($to !== '') {
11723 2
            if (!\is_array($from)) {
11724 2
                $from = self::str_split($from);
11725
            }
11726
11727 2
            if (!\is_array($to)) {
11728 2
                $to = self::str_split($to);
11729
            }
11730
11731 2
            $count_from = \count($from);
11732 2
            $count_to = \count($to);
11733
11734 2
            if ($count_from !== $count_to) {
11735 2
                if ($count_from > $count_to) {
11736 2
                    $from = \array_slice($from, 0, $count_to);
11737 2
                } elseif ($count_from < $count_to) {
11738 2
                    $to = \array_slice($to, 0, $count_from);
11739
                }
11740
            }
11741
11742 2
            $from = \array_combine($from, $to);
11743
            /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */
11744 2
            if ($from === false) {
11745
                throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')');
11746
            }
11747
        }
11748
11749 2
        if (\is_string($from)) {
11750 2
            return \str_replace($from, $to, $str);
0 ignored issues
show
Bug introduced by
It seems like $to can also be of type array<mixed,array> and array<mixed,string[]>; however, parameter $replace of str_replace() does only seem to accept string|string[], maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

11750
            return \str_replace($from, /** @scrutinizer ignore-type */ $to, $str);
Loading history...
11751
        }
11752
11753 2
        return \strtr($str, $from);
11754
    }
11755
11756
    /**
11757
     * Return the width of a string.
11758
     *
11759
     * INFO: use UTF8::strlen() for the byte-length
11760
     *
11761
     * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code>
11762
     *
11763
     * @param string $str        <p>The input string.</p>
11764
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11765
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11766
     *
11767
     * @psalm-pure
11768
     *
11769
     * @return int
11770
     */
11771
    public static function strwidth(
11772
        string $str,
11773
        string $encoding = 'UTF-8',
11774
        bool $clean_utf8 = false
11775
    ): int {
11776 2
        if ($str === '') {
11777 2
            return 0;
11778
        }
11779
11780 2
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11781 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11782
        }
11783
11784 2
        if ($clean_utf8) {
11785
            // iconv and mbstring are not tolerant to invalid encoding
11786
            // further, their behaviour is inconsistent with that of PHP's substr
11787 2
            $str = self::clean($str);
11788
        }
11789
11790
        //
11791
        // fallback via mbstring
11792
        //
11793
11794 2
        if (self::$SUPPORT['mbstring'] === true) {
11795 2
            if ($encoding === 'UTF-8') {
11796 2
                return \mb_strwidth($str);
11797
            }
11798
11799
            return \mb_strwidth($str, $encoding);
11800
        }
11801
11802
        //
11803
        // fallback via vanilla php
11804
        //
11805
11806
        if ($encoding !== 'UTF-8') {
11807
            $str = self::encode('UTF-8', $str, false, $encoding);
11808
        }
11809
11810
        $wide = 0;
11811
        $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide);
11812
11813
        return ($wide << 1) + (int) self::strlen($str);
11814
    }
11815
11816
    /**
11817
     * Get part of a string.
11818
     *
11819
     * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code>
11820
     *
11821
     * @see http://php.net/manual/en/function.mb-substr.php
11822
     *
11823
     * @param string   $str        <p>The string being checked.</p>
11824
     * @param int      $offset     <p>The first position used in str.</p>
11825
     * @param int|null $length     [optional] <p>The maximum length of the returned string.</p>
11826
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
11827
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
11828
     *
11829
     * @psalm-pure
11830
     *
11831
     * @return false|string
11832
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
11833
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
11834
     *                      characters long, <b>FALSE</b> will be returned.
11835
     */
11836
    public static function substr(
11837
        string $str,
11838
        int $offset = 0,
11839
        int $length = null,
11840
        string $encoding = 'UTF-8',
11841
        bool $clean_utf8 = false
11842
    ) {
11843
        // empty string
11844 172
        if ($str === '' || $length === 0) {
11845 8
            return '';
11846
        }
11847
11848 168
        if ($clean_utf8) {
11849
            // iconv and mbstring are not tolerant to invalid encoding
11850
            // further, their behaviour is inconsistent with that of PHP's substr
11851 2
            $str = self::clean($str);
11852
        }
11853
11854
        // whole string
11855 168
        if (!$offset && $length === null) {
11856 7
            return $str;
11857
        }
11858
11859 163
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
11860 19
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
11861
        }
11862
11863
        //
11864
        // fallback via mbstring
11865
        //
11866
11867 163
        if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') {
11868 161
            if ($length === null) {
11869 64
                return \mb_substr($str, $offset);
11870
            }
11871
11872 102
            return \mb_substr($str, $offset, $length);
11873
        }
11874
11875
        //
11876
        // fallback for binary || ascii only
11877
        //
11878
11879
        if (
11880 4
            $encoding === 'CP850'
11881
            ||
11882 4
            $encoding === 'ASCII'
11883
        ) {
11884
            if ($length === null) {
11885
                return \substr($str, $offset);
11886
            }
11887
11888
            return \substr($str, $offset, $length);
11889
        }
11890
11891
        // otherwise we need the string-length
11892 4
        $str_length = 0;
11893 4
        if ($offset || $length === null) {
11894 4
            $str_length = self::strlen($str, $encoding);
11895
        }
11896
11897
        // e.g.: invalid chars + mbstring not installed
11898 4
        if ($str_length === false) {
11899
            return false;
11900
        }
11901
11902
        // empty string
11903 4
        if ($offset === $str_length && !$length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
11904
            return '';
11905
        }
11906
11907
        // impossible
11908 4
        if ($offset && $offset > $str_length) {
11909
            return '';
11910
        }
11911
11912 4
        $length = $length ?? (int)$str_length;
11913
11914
        if (
11915 4
            $encoding !== 'UTF-8'
11916
            &&
11917 4
            self::$SUPPORT['mbstring'] === false
11918
        ) {
11919
            /**
11920
             * @psalm-suppress ImpureFunctionCall - is is only a warning
11921
             */
11922 2
            \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
11923
        }
11924
11925
        //
11926
        // fallback via intl
11927
        //
11928
11929
        if (
11930 4
            $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings
11931
            &&
11932 4
            $offset >= 0 // grapheme_substr() can't handle negative offset
11933
            &&
11934 4
            self::$SUPPORT['intl'] === true
11935
        ) {
11936
            $return_tmp = \grapheme_substr($str, $offset, $length);
11937
            if ($return_tmp !== false) {
11938
                return $return_tmp;
11939
            }
11940
        }
11941
11942
        //
11943
        // fallback via iconv
11944
        //
11945
11946
        if (
11947 4
            $length >= 0 // "iconv_substr()" can't handle negative length
11948
            &&
11949 4
            self::$SUPPORT['iconv'] === true
11950
        ) {
11951
            $return_tmp = \iconv_substr($str, $offset, $length);
11952
            if ($return_tmp !== false) {
11953
                return $return_tmp;
11954
            }
11955
        }
11956
11957
        //
11958
        // fallback for ascii only
11959
        //
11960
11961 4
        if (ASCII::is_ascii($str)) {
11962
            return \substr($str, $offset, $length);
11963
        }
11964
11965
        //
11966
        // fallback via vanilla php
11967
        //
11968
11969
        // split to array, and remove invalid characters
11970 4
        $array = self::str_split($str);
11971
11972
        // extract relevant part, and join to make sting again
11973 4
        return \implode('', \array_slice($array, $offset, $length));
11974
    }
11975
11976
    /**
11977
     * Binary-safe comparison of two strings from an offset, up to a length of characters.
11978
     *
11979
     * EXAMPLE: <code>
11980
     * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1
11981
     * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1
11982
     * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0
11983
     * </code>
11984
     *
11985
     * @param string   $str1               <p>The main string being compared.</p>
11986
     * @param string   $str2               <p>The secondary string being compared.</p>
11987
     * @param int      $offset             [optional] <p>The start position for the comparison. If negative, it starts
11988
     *                                     counting from the end of the string.</p>
11989
     * @param int|null $length             [optional] <p>The length of the comparison. The default value is the largest
11990
     *                                     of the length of the str compared to the length of main_str less the
11991
     *                                     offset.</p>
11992
     * @param bool     $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
11993
     *                                     insensitive.</p>
11994
     * @param string   $encoding           [optional] <p>Set the charset for e.g. "mb_" function</p>
11995
     *
11996
     * @psalm-pure
11997
     *
11998
     * @return int
11999
     *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
12000
     *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
12001
     *             <strong>0</strong> if they are equal
12002
     */
12003
    public static function substr_compare(
12004
        string $str1,
12005
        string $str2,
12006
        int $offset = 0,
12007
        int $length = null,
12008
        bool $case_insensitivity = false,
12009
        string $encoding = 'UTF-8'
12010
    ): int {
12011
        if (
12012 2
            $offset !== 0
12013
            ||
12014 2
            $length !== null
12015
        ) {
12016 2
            if ($encoding === 'UTF-8') {
12017 2
                if ($length === null) {
12018 2
                    $str1 = (string) \mb_substr($str1, $offset);
12019
                } else {
12020 2
                    $str1 = (string) \mb_substr($str1, $offset, $length);
12021
                }
12022 2
                $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1));
12023
            } else {
12024
                $encoding = self::normalize_encoding($encoding, 'UTF-8');
12025
12026
                $str1 = (string) self::substr($str1, $offset, $length, $encoding);
12027
                $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding);
12028
            }
12029
        }
12030
12031 2
        if ($case_insensitivity) {
12032 2
            return self::strcasecmp($str1, $str2, $encoding);
12033
        }
12034
12035 2
        return self::strcmp($str1, $str2);
12036
    }
12037
12038
    /**
12039
     * Count the number of substring occurrences.
12040
     *
12041
     * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code>
12042
     *
12043
     * @see http://php.net/manual/en/function.substr-count.php
12044
     *
12045
     * @param string   $haystack   <p>The string to search in.</p>
12046
     * @param string   $needle     <p>The substring to search for.</p>
12047
     * @param int      $offset     [optional] <p>The offset where to start counting.</p>
12048
     * @param int|null $length     [optional] <p>
12049
     *                             The maximum length after the specified offset to search for the
12050
     *                             substring. It outputs a warning if the offset plus the length is
12051
     *                             greater than the haystack length.
12052
     *                             </p>
12053
     * @param string   $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12054
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12055
     *
12056
     * @psalm-pure
12057
     *
12058
     * @return false|int
12059
     *                   <p>This functions returns an integer or false if there isn't a string.</p>
12060
     */
12061
    public static function substr_count(
12062
        string $haystack,
12063
        string $needle,
12064
        int $offset = 0,
12065
        int $length = null,
12066
        string $encoding = 'UTF-8',
12067
        bool $clean_utf8 = false
12068
    ) {
12069 5
        if ($haystack === '' || $needle === '') {
12070 2
            return false;
12071
        }
12072
12073 5
        if ($length === 0) {
12074 2
            return 0;
12075
        }
12076
12077 5
        if ($encoding !== 'UTF-8' && $encoding !== 'CP850') {
12078 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12079
        }
12080
12081 5
        if ($clean_utf8) {
12082
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12083
            // if invalid characters are found in $haystack before $needle
12084
            $needle = self::clean($needle);
12085
            $haystack = self::clean($haystack);
12086
        }
12087
12088 5
        if ($offset || $length > 0) {
12089 2
            if ($length === null) {
12090 2
                $length_tmp = self::strlen($haystack, $encoding);
12091 2
                if ($length_tmp === false) {
12092
                    return false;
12093
                }
12094 2
                $length = (int) $length_tmp;
12095
            }
12096
12097 2
            if ($encoding === 'UTF-8') {
12098 2
                $haystack = (string) \mb_substr($haystack, $offset, $length);
12099
            } else {
12100 2
                $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding);
12101
            }
12102
        }
12103
12104
        if (
12105 5
            $encoding !== 'UTF-8'
12106
            &&
12107 5
            self::$SUPPORT['mbstring'] === false
12108
        ) {
12109
            /**
12110
             * @psalm-suppress ImpureFunctionCall - is is only a warning
12111
             */
12112
            \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING);
12113
        }
12114
12115 5
        if (self::$SUPPORT['mbstring'] === true) {
12116 5
            if ($encoding === 'UTF-8') {
12117 5
                return \mb_substr_count($haystack, $needle);
12118
            }
12119
12120 2
            return \mb_substr_count($haystack, $needle, $encoding);
12121
        }
12122
12123
        \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER);
12124
12125
        return \count($matches);
12126
    }
12127
12128
    /**
12129
     * Count the number of substring occurrences.
12130
     *
12131
     * @param string   $haystack <p>
12132
     *                           The string being checked.
12133
     *                           </p>
12134
     * @param string   $needle   <p>
12135
     *                           The string being found.
12136
     *                           </p>
12137
     * @param int      $offset   [optional] <p>
12138
     *                           The offset where to start counting
12139
     *                           </p>
12140
     * @param int|null $length   [optional] <p>
12141
     *                           The maximum length after the specified offset to search for the
12142
     *                           substring. It outputs a warning if the offset plus the length is
12143
     *                           greater than the haystack length.
12144
     *                           </p>
12145
     *
12146
     * @psalm-pure
12147
     *
12148
     * @return false|int
12149
     *                   <p>The number of times the
12150
     *                   needle substring occurs in the
12151
     *                   haystack string.</p>
12152
     */
12153
    public static function substr_count_in_byte(
12154
        string $haystack,
12155
        string $needle,
12156
        int $offset = 0,
12157
        int $length = null
12158
    ) {
12159 4
        if ($haystack === '' || $needle === '') {
12160 1
            return 0;
12161
        }
12162
12163
        if (
12164 3
            ($offset || $length !== null)
12165
            &&
12166 3
            self::$SUPPORT['mbstring_func_overload'] === true
12167
        ) {
12168
            if ($length === null) {
12169
                $length_tmp = self::strlen($haystack);
12170
                if ($length_tmp === false) {
12171
                    return false;
12172
                }
12173
                $length = (int) $length_tmp;
12174
            }
12175
12176
            if (
12177
                (
12178
                    $length !== 0
12179
                    &&
12180
                    $offset !== 0
12181
                )
12182
                &&
12183
                ($length + $offset) <= 0
12184
                &&
12185
                !Bootup::is_php('7.1') // output from "substr_count()" have changed in PHP 7.1
12186
            ) {
12187
                return false;
12188
            }
12189
12190
            /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */
12191
            $haystack_tmp = \substr($haystack, $offset, $length);
12192
            if ($haystack_tmp === false) {
12193
                $haystack_tmp = '';
12194
            }
12195
            $haystack = (string) $haystack_tmp;
12196
        }
12197
12198 3
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12199
            // "mb_" is available if overload is used, so use it ...
12200
            return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT
12201
        }
12202
12203 3
        if ($length === null) {
12204 3
            return \substr_count($haystack, $needle, $offset);
12205
        }
12206
12207
        return \substr_count($haystack, $needle, $offset, $length);
12208
    }
12209
12210
    /**
12211
     * Returns the number of occurrences of $substring in the given string.
12212
     * By default, the comparison is case-sensitive, but can be made insensitive
12213
     * by setting $case_sensitive to false.
12214
     *
12215
     * @param string $str            <p>The input string.</p>
12216
     * @param string $substring      <p>The substring to search for.</p>
12217
     * @param bool   $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p>
12218
     * @param string $encoding       [optional] <p>Set the charset for e.g. "mb_" function</p>
12219
     *
12220
     * @psalm-pure
12221
     *
12222
     * @return int
12223
     */
12224
    public static function substr_count_simple(
12225
        string $str,
12226
        string $substring,
12227
        bool $case_sensitive = true,
12228
        string $encoding = 'UTF-8'
12229
    ): int {
12230 15
        if ($str === '' || $substring === '') {
12231 2
            return 0;
12232
        }
12233
12234 13
        if ($encoding === 'UTF-8') {
12235 7
            if ($case_sensitive) {
12236
                return (int) \mb_substr_count($str, $substring);
12237
            }
12238
12239 7
            return (int) \mb_substr_count(
12240 7
                \mb_strtoupper($str),
12241 7
                \mb_strtoupper($substring)
12242
            );
12243
        }
12244
12245 6
        $encoding = self::normalize_encoding($encoding, 'UTF-8');
12246
12247 6
        if ($case_sensitive) {
12248 3
            return (int) \mb_substr_count($str, $substring, $encoding);
12249
        }
12250
12251 3
        return (int) \mb_substr_count(
12252 3
            self::strtocasefold($str, true, false, $encoding, null, false),
12253 3
            self::strtocasefold($substring, true, false, $encoding, null, false),
12254 3
            $encoding
12255
        );
12256
    }
12257
12258
    /**
12259
     * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive.
12260
     *
12261
     * EXMAPLE: <code>
12262
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12263
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
12264
     * </code>
12265
     *
12266
     * @param string $haystack <p>The string to search in.</p>
12267
     * @param string $needle   <p>The substring to search for.</p>
12268
     *
12269
     * @psalm-pure
12270
     *
12271
     * @return string
12272
     *                <p>Return the sub-string.</p>
12273
     */
12274
    public static function substr_ileft(string $haystack, string $needle): string
12275
    {
12276 2
        if ($haystack === '') {
12277 2
            return '';
12278
        }
12279
12280 2
        if ($needle === '') {
12281 2
            return $haystack;
12282
        }
12283
12284 2
        if (self::str_istarts_with($haystack, $needle)) {
12285 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12286
        }
12287
12288 2
        return $haystack;
12289
    }
12290
12291
    /**
12292
     * Get part of a string process in bytes.
12293
     *
12294
     * @param string   $str    <p>The string being checked.</p>
12295
     * @param int      $offset <p>The first position used in str.</p>
12296
     * @param int|null $length [optional] <p>The maximum length of the returned string.</p>
12297
     *
12298
     * @psalm-pure
12299
     *
12300
     * @return false|string
12301
     *                      The portion of <i>str</i> specified by the <i>offset</i> and
12302
     *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
12303
     *                      characters long, <b>FALSE</b> will be returned.
12304
     */
12305
    public static function substr_in_byte(string $str, int $offset = 0, int $length = null)
12306
    {
12307
        // empty string
12308 1
        if ($str === '' || $length === 0) {
12309
            return '';
12310
        }
12311
12312
        // whole string
12313 1
        if (!$offset && $length === null) {
12314
            return $str;
12315
        }
12316
12317 1
        if (self::$SUPPORT['mbstring_func_overload'] === true) {
12318
            // "mb_" is available if overload is used, so use it ...
12319
            return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT
12320
        }
12321
12322 1
        return \substr($str, $offset, $length ?? 2147483647);
12323
    }
12324
12325
    /**
12326
     * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive.
12327
     *
12328
     * EXAMPLE: <code>
12329
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12330
     * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle'
12331
     * </code>
12332
     *
12333
     * @param string $haystack <p>The string to search in.</p>
12334
     * @param string $needle   <p>The substring to search for.</p>
12335
     *
12336
     * @psalm-pure
12337
     *
12338
     * @return string
12339
     *                <p>Return the sub-string.<p>
12340
     */
12341
    public static function substr_iright(string $haystack, string $needle): string
12342
    {
12343 2
        if ($haystack === '') {
12344 2
            return '';
12345
        }
12346
12347 2
        if ($needle === '') {
12348 2
            return $haystack;
12349
        }
12350
12351 2
        if (self::str_iends_with($haystack, $needle)) {
12352 2
            $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle));
12353
        }
12354
12355 2
        return $haystack;
12356
    }
12357
12358
    /**
12359
     * Removes a prefix ($needle) from the beginning of the string ($haystack).
12360
     *
12361
     * EXAMPLE: <code>
12362
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
12363
     * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd'
12364
     * </code>
12365
     *
12366
     * @param string $haystack <p>The string to search in.</p>
12367
     * @param string $needle   <p>The substring to search for.</p>
12368
     *
12369
     * @psalm-pure
12370
     *
12371
     * @return string
12372
     *                <p>Return the sub-string.</p>
12373
     */
12374
    public static function substr_left(string $haystack, string $needle): string
12375
    {
12376 2
        if ($haystack === '') {
12377 2
            return '';
12378
        }
12379
12380 2
        if ($needle === '') {
12381 2
            return $haystack;
12382
        }
12383
12384 2
        if (self::str_starts_with($haystack, $needle)) {
12385 2
            $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle));
12386
        }
12387
12388 2
        return $haystack;
12389
    }
12390
12391
    /**
12392
     * Replace text within a portion of a string.
12393
     *
12394
     * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code>
12395
     *
12396
     * source: https://gist.github.com/stemar/8287074
12397
     *
12398
     * @param string|string[] $str         <p>The input string or an array of stings.</p>
12399
     * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
12400
     * @param int|int[]       $offset      <p>
12401
     *                                     If start is positive, the replacing will begin at the start'th offset
12402
     *                                     into string.
12403
     *                                     <br><br>
12404
     *                                     If start is negative, the replacing will begin at the start'th character
12405
     *                                     from the end of string.
12406
     *                                     </p>
12407
     * @param int|int[]|null  $length      [optional] <p>If given and is positive, it represents the length of the
12408
     *                                     portion of string which is to be replaced. If it is negative, it
12409
     *                                     represents the number of characters from the end of string at which to
12410
     *                                     stop replacing. If it is not given, then it will default to strlen(
12411
     *                                     string ); i.e. end the replacing at the end of string. Of course, if
12412
     *                                     length is zero then this function will have the effect of inserting
12413
     *                                     replacement into string at the given start offset.</p>
12414
     * @param string          $encoding    [optional] <p>Set the charset for e.g. "mb_" function</p>
12415
     *
12416
     * @psalm-pure
12417
     *
12418
     * @return string|string[]
12419
     *                         <p>The result string is returned. If string is an array then array is returned.</p>
12420
     */
12421
    public static function substr_replace(
12422
        $str,
12423
        $replacement,
12424
        $offset,
12425
        $length = null,
12426
        string $encoding = 'UTF-8'
12427
    ) {
12428 10
        if (\is_array($str)) {
12429 1
            $num = \count($str);
12430
12431
            // the replacement
12432 1
            if (\is_array($replacement)) {
12433 1
                $replacement = \array_slice($replacement, 0, $num);
12434
            } else {
12435 1
                $replacement = \array_pad([$replacement], $num, $replacement);
12436
            }
12437
12438
            // the offset
12439 1
            if (\is_array($offset)) {
12440 1
                $offset = \array_slice($offset, 0, $num);
12441 1
                foreach ($offset as &$value_tmp) {
12442 1
                    $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0;
12443
                }
12444 1
                unset($value_tmp);
12445
            } else {
12446 1
                $offset = \array_pad([$offset], $num, $offset);
12447
            }
12448
12449
            // the length
12450 1
            if ($length === null) {
12451 1
                $length = \array_fill(0, $num, 0);
12452 1
            } elseif (\is_array($length)) {
12453 1
                $length = \array_slice($length, 0, $num);
12454 1
                foreach ($length as &$value_tmp_V2) {
12455 1
                    $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num;
12456
                }
12457 1
                unset($value_tmp_V2);
12458
            } else {
12459 1
                $length = \array_pad([$length], $num, $length);
12460
            }
12461
12462
            // recursive call
12463 1
            return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length);
12464
        }
12465
12466 10
        if (\is_array($replacement)) {
12467 1
            if ($replacement !== []) {
12468 1
                $replacement = $replacement[0];
12469
            } else {
12470 1
                $replacement = '';
12471
            }
12472
        }
12473
12474
        // init
12475 10
        $str = (string) $str;
12476 10
        $replacement = (string) $replacement;
12477
12478 10
        if (\is_array($length)) {
12479
            throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.');
12480
        }
12481
12482 10
        if (\is_array($offset)) {
12483
            throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.');
12484
        }
12485
12486 10
        if ($str === '') {
12487 1
            return $replacement;
12488
        }
12489
12490 9
        if (self::$SUPPORT['mbstring'] === true) {
12491 9
            $string_length = (int) self::strlen($str, $encoding);
12492
12493 9
            if ($offset < 0) {
12494 1
                $offset = (int) \max(0, $string_length + $offset);
12495 9
            } elseif ($offset > $string_length) {
12496 1
                $offset = $string_length;
12497
            }
12498
12499 9
            if ($length !== null && $length < 0) {
12500 1
                $length = (int) \max(0, $string_length - $offset + $length);
12501 9
            } elseif ($length === null || $length > $string_length) {
12502 4
                $length = $string_length;
12503
            }
12504
12505
            /** @noinspection AdditionOperationOnArraysInspection */
12506 9
            if (($offset + $length) > $string_length) {
12507 4
                $length = $string_length - $offset;
12508
            }
12509
12510
            /** @noinspection AdditionOperationOnArraysInspection */
12511 9
            return ((string) \mb_substr($str, 0, $offset, $encoding)) .
12512 9
                   $replacement .
12513 9
                   ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding));
12514
        }
12515
12516
        //
12517
        // fallback for ascii only
12518
        //
12519
12520
        if (ASCII::is_ascii($str)) {
12521
            return ($length === null) ?
12522
                \substr_replace($str, $replacement, $offset) :
12523
                \substr_replace($str, $replacement, $offset, $length);
12524
        }
12525
12526
        //
12527
        // fallback via vanilla php
12528
        //
12529
12530
        \preg_match_all('/./us', $str, $str_matches);
12531
        \preg_match_all('/./us', $replacement, $replacement_matches);
12532
12533
        if ($length === null) {
12534
            $length_tmp = self::strlen($str, $encoding);
12535
            if ($length_tmp === false) {
12536
                // e.g.: non mbstring support + invalid chars
12537
                return '';
12538
            }
12539
            $length = (int) $length_tmp;
12540
        }
12541
12542
        \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]);
12543
12544
        return \implode('', $str_matches[0]);
12545
    }
12546
12547
    /**
12548
     * Removes a suffix ($needle) from the end of the string ($haystack).
12549
     *
12550
     * EXAMPLE: <code>
12551
     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
12552
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
12553
     * </code>
12554
     *
12555
     * @param string $haystack <p>The string to search in.</p>
12556
     * @param string $needle   <p>The substring to search for.</p>
12557
     * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p>
12558
     *
12559
     * @psalm-pure
12560
     *
12561
     * @return string
12562
     *                <p>Return the sub-string.</p>
12563
     */
12564
    public static function substr_right(
12565
        string $haystack,
12566
        string $needle,
12567
        string $encoding = 'UTF-8'
12568
    ): string {
12569 2
        if ($haystack === '') {
12570 2
            return '';
12571
        }
12572
12573 2
        if ($needle === '') {
12574 2
            return $haystack;
12575
        }
12576
12577
        if (
12578 2
            $encoding === 'UTF-8'
12579
            &&
12580 2
            \substr($haystack, -\strlen($needle)) === $needle
12581
        ) {
12582 2
            return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle));
12583
        }
12584
12585 2
        if (\substr($haystack, -\strlen($needle)) === $needle) {
12586
            return (string) self::substr(
12587
                $haystack,
12588
                0,
12589
                (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding),
12590
                $encoding
12591
            );
12592
        }
12593
12594 2
        return $haystack;
12595
    }
12596
12597
    /**
12598
     * Returns a case swapped version of the string.
12599
     *
12600
     * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code>
12601
     *
12602
     * @param string $str        <p>The input string.</p>
12603
     * @param string $encoding   [optional] <p>Set the charset for e.g. "mb_" function</p>
12604
     * @param bool   $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
12605
     *
12606
     * @psalm-pure
12607
     *
12608
     * @return string
12609
     *                <p>Each character's case swapped.</p>
12610
     */
12611
    public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string
12612
    {
12613 6
        if ($str === '') {
12614 1
            return '';
12615
        }
12616
12617 6
        if ($clean_utf8) {
12618
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12619
            // if invalid characters are found in $haystack before $needle
12620 2
            $str = self::clean($str);
12621
        }
12622
12623 6
        if ($encoding === 'UTF-8') {
12624 4
            return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str);
12625
        }
12626
12627 4
        return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str);
12628
    }
12629
12630
    /**
12631
     * Checks whether symfony-polyfills are used.
12632
     *
12633
     * @psalm-pure
12634
     *
12635
     * @return bool
12636
     *              <strong>true</strong> if in use, <strong>false</strong> otherwise
12637
     */
12638
    public static function symfony_polyfill_used(): bool
12639
    {
12640
        // init
12641
        $return = false;
12642
12643
        $return_tmp = \extension_loaded('mbstring');
12644
        if (!$return_tmp && \function_exists('mb_strlen')) {
12645
            $return = true;
12646
        }
12647
12648
        $return_tmp = \extension_loaded('iconv');
12649
        if (!$return_tmp && \function_exists('iconv')) {
12650
            $return = true;
12651
        }
12652
12653
        return $return;
12654
    }
12655
12656
    /**
12657
     * @param string $str
12658
     * @param int    $tab_length
12659
     *
12660
     * @psalm-pure
12661
     *
12662
     * @return string
12663
     */
12664
    public static function tabs_to_spaces(string $str, int $tab_length = 4): string
12665
    {
12666 6
        if ($tab_length === 4) {
12667 3
            $spaces = '    ';
12668 3
        } elseif ($tab_length === 2) {
12669 1
            $spaces = '  ';
12670
        } else {
12671 2
            $spaces = \str_repeat(' ', $tab_length);
12672
        }
12673
12674 6
        return \str_replace("\t", $spaces, $str);
12675
    }
12676
12677
    /**
12678
     * Converts the first character of each word in the string to uppercase
12679
     * and all other chars to lowercase.
12680
     *
12681
     * @param string      $str                           <p>The input string.</p>
12682
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
12683
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
12684
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
12685
     *                                                   tr</p>
12686
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
12687
     *                                                   -> ß</p>
12688
     *
12689
     * @psalm-pure
12690
     *
12691
     * @return string
12692
     *                <p>A string with all characters of $str being title-cased.</p>
12693
     */
12694
    public static function titlecase(
12695
        string $str,
12696
        string $encoding = 'UTF-8',
12697
        bool $clean_utf8 = false,
12698
        string $lang = null,
12699
        bool $try_to_keep_the_string_length = false
12700
    ): string {
12701 5
        if ($clean_utf8) {
12702
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
12703
            // if invalid characters are found in $haystack before $needle
12704
            $str = self::clean($str);
12705
        }
12706
12707
        if (
12708 5
            $lang === null
12709
            &&
12710 5
            !$try_to_keep_the_string_length
12711
        ) {
12712 5
            if ($encoding === 'UTF-8') {
12713 3
                return \mb_convert_case($str, \MB_CASE_TITLE);
12714
            }
12715
12716 2
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
12717
12718 2
            return \mb_convert_case($str, \MB_CASE_TITLE, $encoding);
12719
        }
12720
12721
        return self::str_titleize(
12722
            $str,
12723
            null,
12724
            $encoding,
12725
            false,
12726
            $lang,
12727
            $try_to_keep_the_string_length,
12728
            false
12729
        );
12730
    }
12731
12732
    /**
12733
     * alias for "UTF8::to_ascii()"
12734
     *
12735
     * @param string $str
12736
     * @param string $subst_chr
12737
     * @param bool   $strict
12738
     *
12739
     * @psalm-pure
12740
     *
12741
     * @return string
12742
     *
12743
     * @see        UTF8::to_ascii()
12744
     * @deprecated <p>please use "UTF8::to_ascii()"</p>
12745
     */
12746
    public static function toAscii(
12747
        string $str,
12748
        string $subst_chr = '?',
12749
        bool $strict = false
12750
    ): string {
12751 7
        return self::to_ascii($str, $subst_chr, $strict);
12752
    }
12753
12754
    /**
12755
     * alias for "UTF8::to_iso8859()"
12756
     *
12757
     * @param string|string[] $str
12758
     *
12759
     * @psalm-pure
12760
     *
12761
     * @return string|string[]
12762
     *
12763
     * @see        UTF8::to_iso8859()
12764
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12765
     */
12766
    public static function toIso8859($str)
12767
    {
12768 2
        return self::to_iso8859($str);
12769
    }
12770
12771
    /**
12772
     * alias for "UTF8::to_latin1()"
12773
     *
12774
     * @param string|string[] $str
12775
     *
12776
     * @psalm-pure
12777
     *
12778
     * @return string|string[]
12779
     *
12780
     * @see        UTF8::to_iso8859()
12781
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12782
     */
12783
    public static function toLatin1($str)
12784
    {
12785 2
        return self::to_iso8859($str);
12786
    }
12787
12788
    /**
12789
     * alias for "UTF8::to_utf8()"
12790
     *
12791
     * @param string|string[] $str
12792
     *
12793
     * @psalm-pure
12794
     *
12795
     * @return string|string[]
12796
     *
12797
     * @see        UTF8::to_utf8()
12798
     * @deprecated <p>please use "UTF8::to_utf8()"</p>
12799
     */
12800
    public static function toUTF8($str)
12801
    {
12802 2
        return self::to_utf8($str);
12803
    }
12804
12805
    /**
12806
     * Convert a string into ASCII.
12807
     *
12808
     * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code>
12809
     *
12810
     * @param string $str     <p>The input string.</p>
12811
     * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
12812
     * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
12813
     *                        performance</p>
12814
     *
12815
     * @psalm-pure
12816
     *
12817
     * @return string
12818
     */
12819
    public static function to_ascii(
12820
        string $str,
12821
        string $unknown = '?',
12822
        bool $strict = false
12823
    ): string {
12824 37
        return ASCII::to_transliterate($str, $unknown, $strict);
12825
    }
12826
12827
    /**
12828
     * @param bool|int|string $str
12829
     *
12830
     * @psalm-param bool|int|numeric-string $str
12831
     *
12832
     * @psalm-pure
12833
     *
12834
     * @return bool
12835
     */
12836
    public static function to_boolean($str): bool
12837
    {
12838
        // init
12839 19
        $str = (string) $str;
12840
12841 19
        if ($str === '') {
12842 2
            return false;
12843
        }
12844
12845
        // Info: http://php.net/manual/en/filter.filters.validate.php
12846
        $map = [
12847 17
            'true'  => true,
12848
            '1'     => true,
12849
            'on'    => true,
12850
            'yes'   => true,
12851
            'false' => false,
12852
            '0'     => false,
12853
            'off'   => false,
12854
            'no'    => false,
12855
        ];
12856
12857 17
        if (isset($map[$str])) {
12858 11
            return $map[$str];
12859
        }
12860
12861 6
        $key = \strtolower($str);
12862 6
        if (isset($map[$key])) {
12863 2
            return $map[$key];
12864
        }
12865
12866 4
        if (\is_numeric($str)) {
12867 2
            return ((float) $str + 0) > 0;
12868
        }
12869
12870 2
        return (bool) \trim($str);
12871
    }
12872
12873
    /**
12874
     * Convert given string to safe filename (and keep string case).
12875
     *
12876
     * @param string $str
12877
     * @param bool   $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are
12878
     *                                  simply replaced with hyphen.
12879
     * @param string $fallback_char
12880
     *
12881
     * @psalm-pure
12882
     *
12883
     * @return string
12884
     */
12885
    public static function to_filename(
12886
        string $str,
12887
        bool $use_transliterate = false,
12888
        string $fallback_char = '-'
12889
    ): string {
12890 1
        return ASCII::to_filename(
12891 1
            $str,
12892 1
            $use_transliterate,
12893 1
            $fallback_char
12894
        );
12895
    }
12896
12897
    /**
12898
     * Convert a string into "ISO-8859"-encoding (Latin-1).
12899
     *
12900
     * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859('  -ABC-中文空白-  ')); // '  -ABC-????-  '</code>
12901
     *
12902
     * @param string|string[] $str
12903
     *
12904
     * @psalm-pure
12905
     *
12906
     * @return string|string[]
12907
     */
12908
    public static function to_iso8859($str)
12909
    {
12910 8
        if (\is_array($str)) {
12911 2
            foreach ($str as $k => &$v) {
12912 2
                $v = self::to_iso8859($v);
12913
            }
12914
12915 2
            return $str;
12916
        }
12917
12918 8
        $str = (string) $str;
12919 8
        if ($str === '') {
12920 2
            return '';
12921
        }
12922
12923 8
        return self::utf8_decode($str);
12924
    }
12925
12926
    /**
12927
     * alias for "UTF8::to_iso8859()"
12928
     *
12929
     * @param string|string[] $str
12930
     *
12931
     * @psalm-pure
12932
     *
12933
     * @return string|string[]
12934
     *
12935
     * @see        UTF8::to_iso8859()
12936
     * @deprecated <p>please use "UTF8::to_iso8859()"</p>
12937
     */
12938
    public static function to_latin1($str)
12939
    {
12940 2
        return self::to_iso8859($str);
12941
    }
12942
12943
    /**
12944
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12945
     *
12946
     * <ul>
12947
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12948
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12949
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12950
     * case.</li>
12951
     * </ul>
12952
     *
12953
     * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code>
12954
     *
12955
     * @param string|string[] $str                        <p>Any string or array of strings.</p>
12956
     * @param bool            $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12957
     *
12958
     * @psalm-pure
12959
     *
12960
     * @return string|string[]
12961
     *                         <p>The UTF-8 encoded string</p>
12962
     *
12963
     * @template TToUtf8
12964
     * @psalm-param TToUtf8 $str
12965
     * @psalm-return TToUtf8
12966
     *
12967
     * @noinspection SuspiciousBinaryOperationInspection
12968
     */
12969
    public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false)
12970
    {
12971 44
        if (\is_array($str)) {
12972 4
            foreach ($str as $k => &$v) {
12973 4
                $v = self::to_utf8_string($v, $decode_html_entity_to_utf8);
12974
            }
12975
12976 4
            return $str;
12977
        }
12978
12979
        /** @psalm-var TToUtf8 $str */
12980 44
        $str = self::to_utf8_string($str, $decode_html_entity_to_utf8);
12981
12982 44
        return $str;
12983
    }
12984
12985
    /**
12986
     * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
12987
     *
12988
     * <ul>
12989
     * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li>
12990
     * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
12991
     * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
12992
     * case.</li>
12993
     * </ul>
12994
     *
12995
     * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code>
12996
     *
12997
     * @param string $str                        <p>Any string.</p>
12998
     * @param bool   $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p>
12999
     *
13000
     * @psalm-pure
13001
     *
13002
     * @return string
13003
     *                <p>The UTF-8 encoded string</p>
13004
     *
13005
     * @noinspection SuspiciousBinaryOperationInspection
13006
     */
13007
    public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string
13008
    {
13009 44
        if ($str === '') {
13010 7
            return $str;
13011
        }
13012
13013 44
        $max = \strlen($str);
13014 44
        $buf = '';
13015
13016 44
        for ($i = 0; $i < $max; ++$i) {
13017 44
            $c1 = $str[$i];
13018
13019 44
            if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
13020
13021 40
                if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
13022
13023 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13024
13025 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
13026 22
                        $buf .= $c1 . $c2;
13027 22
                        ++$i;
13028
                    } else { // not valid UTF8 - convert it
13029 36
                        $buf .= self::to_utf8_convert_helper($c1);
13030
                    }
13031 37
                } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
13032
13033 36
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13034 36
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13035
13036 36
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
13037 17
                        $buf .= $c1 . $c2 . $c3;
13038 17
                        $i += 2;
13039
                    } else { // not valid UTF8 - convert it
13040 36
                        $buf .= self::to_utf8_convert_helper($c1);
13041
                    }
13042 28
                } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
13043
13044 28
                    $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
13045 28
                    $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
13046 28
                    $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
13047
13048 28
                    if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
13049 10
                        $buf .= $c1 . $c2 . $c3 . $c4;
13050 10
                        $i += 3;
13051
                    } else { // not valid UTF8 - convert it
13052 28
                        $buf .= self::to_utf8_convert_helper($c1);
13053
                    }
13054
                } else { // doesn't look like UTF8, but should be converted
13055
13056 40
                    $buf .= self::to_utf8_convert_helper($c1);
13057
                }
13058 41
            } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
13059
13060 4
                $buf .= self::to_utf8_convert_helper($c1);
13061
            } else { // it doesn't need conversion
13062
13063 41
                $buf .= $c1;
13064
            }
13065
        }
13066
13067
        // decode unicode escape sequences + unicode surrogate pairs
13068 44
        $buf = \preg_replace_callback(
13069 44
            '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/',
13070
            /**
13071
             * @param array $matches
13072
             *
13073
             * @psalm-pure
13074
             *
13075
             * @return string
13076
             */
13077
            static function (array $matches): string {
13078 13
                if (isset($matches[3])) {
13079 13
                    $cp = (int) \hexdec($matches[3]);
13080
                } else {
13081
                    // http://unicode.org/faq/utf_bom.html#utf16-4
13082 1
                    $cp = ((int) \hexdec($matches[1]) << 10)
13083 1
                          + (int) \hexdec($matches[2])
13084 1
                          + 0x10000
13085 1
                          - (0xD800 << 10)
13086 1
                          - 0xDC00;
13087
                }
13088
13089
                // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471
13090
                //
13091
                // php_utf32_utf8(unsigned char *buf, unsigned k)
13092
13093 13
                if ($cp < 0x80) {
13094 8
                    return (string) self::chr($cp);
13095
                }
13096
13097 10
                if ($cp < 0xA0) {
13098
                    /** @noinspection UnnecessaryCastingInspection */
13099
                    return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F);
13100
                }
13101
13102 10
                return self::decimal_to_chr($cp);
13103 44
            },
13104 44
            $buf
13105
        );
13106
13107 44
        if ($buf === null) {
13108
            return '';
13109
        }
13110
13111
        // decode UTF-8 codepoints
13112 44
        if ($decode_html_entity_to_utf8) {
13113 3
            $buf = self::html_entity_decode($buf);
13114
        }
13115
13116 44
        return $buf;
13117
    }
13118
13119
    /**
13120
     * Returns the given string as an integer, or null if the string isn't numeric.
13121
     *
13122
     * @param string $str
13123
     *
13124
     * @psalm-pure
13125
     *
13126
     * @return int|null
13127
     *                  <p>null if the string isn't numeric</p>
13128
     */
13129
    public static function to_int(string $str)
13130
    {
13131 1
        if (\is_numeric($str)) {
13132 1
            return (int) $str;
13133
        }
13134
13135 1
        return null;
13136
    }
13137
13138
    /**
13139
     * Returns the given input as string, or null if the input isn't int|float|string
13140
     * and do not implement the "__toString()" method.
13141
     *
13142
     * @param float|int|object|string|null $input
13143
     *
13144
     * @psalm-pure
13145
     *
13146
     * @return string|null
13147
     *                     <p>null if the input isn't int|float|string and has no "__toString()" method</p>
13148
     */
13149
    public static function to_string($input)
13150
    {
13151 1
        if ($input === null) {
13152
            return null;
13153
        }
13154
13155
        /** @var string $input_type - hack for psalm */
13156 1
        $input_type = \gettype($input);
13157
13158
        if (
13159 1
            $input_type === 'string'
13160
            ||
13161 1
            $input_type === 'integer'
13162
            ||
13163 1
            $input_type === 'float'
13164
            ||
13165 1
            $input_type === 'double'
13166
        ) {
13167 1
            return (string) $input;
13168
        }
13169
13170 1
        if ($input_type === 'object') {
13171
            /** @noinspection PhpSillyAssignmentInspection */
13172
            /** @var object $input - hack for psalm / phpstan */
13173 1
            $input = $input;
13174
            /** @noinspection NestedPositiveIfStatementsInspection */
13175
            /** @noinspection MissingOrEmptyGroupStatementInspection */
13176 1
            if (\method_exists($input, '__toString')) {
13177 1
                return (string) $input;
13178
            }
13179
        }
13180
13181 1
        return null;
13182
    }
13183
13184
    /**
13185
     * Strip whitespace or other characters from the beginning and end of a UTF-8 string.
13186
     *
13187
     * INFO: This is slower then "trim()"
13188
     *
13189
     * We can only use the original-function, if we use <= 7-Bit in the string / chars
13190
     * but the check for ASCII (7-Bit) cost more time, then we can safe here.
13191
     *
13192
     * EXAMPLE: <code>UTF8::trim('   -ABC-中文空白-  '); // '-ABC-中文空白-'</code>
13193
     *
13194
     * @param string      $str   <p>The string to be trimmed</p>
13195
     * @param string|null $chars [optional] <p>Optional characters to be stripped</p>
13196
     *
13197
     * @psalm-pure
13198
     *
13199
     * @return string
13200
     *                <p>The trimmed string.</p>
13201
     */
13202
    public static function trim(string $str = '', string $chars = null): string
13203
    {
13204 57
        if ($str === '') {
13205 9
            return '';
13206
        }
13207
13208 50
        if (self::$SUPPORT['mbstring'] === true) {
13209 50
            if ($chars !== null) {
13210
                /** @noinspection PregQuoteUsageInspection */
13211 28
                $chars = \preg_quote($chars);
13212 28
                $pattern = "^[${chars}]+|[${chars}]+\$";
13213
            } else {
13214 22
                $pattern = '^[\\s]+|[\\s]+$';
13215
            }
13216
13217
            /** @noinspection PhpComposerExtensionStubsInspection */
13218 50
            return (string) \mb_ereg_replace($pattern, '', $str);
13219
        }
13220
13221 8
        if ($chars !== null) {
13222
            $chars = \preg_quote($chars, '/');
13223
            $pattern = "^[${chars}]+|[${chars}]+\$";
13224
        } else {
13225 8
            $pattern = '^[\\s]+|[\\s]+$';
13226
        }
13227
13228 8
        return self::regex_replace($str, $pattern, '');
13229
    }
13230
13231
    /**
13232
     * Makes string's first char uppercase.
13233
     *
13234
     * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code>
13235
     *
13236
     * @param string      $str                           <p>The input string.</p>
13237
     * @param string      $encoding                      [optional] <p>Set the charset for e.g. "mb_" function</p>
13238
     * @param bool        $clean_utf8                    [optional] <p>Remove non UTF-8 chars from the string.</p>
13239
     * @param string|null $lang                          [optional] <p>Set the language for special cases: az, el, lt,
13240
     *                                                   tr</p>
13241
     * @param bool        $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ
13242
     *                                                   -> ß</p>
13243
     *
13244
     * @psalm-pure
13245
     *
13246
     * @return string
13247
     *                <p>The resulting string with with char uppercase.</p>
13248
     */
13249
    public static function ucfirst(
13250
        string $str,
13251
        string $encoding = 'UTF-8',
13252
        bool $clean_utf8 = false,
13253
        string $lang = null,
13254
        bool $try_to_keep_the_string_length = false
13255
    ): string {
13256 69
        if ($str === '') {
13257 3
            return '';
13258
        }
13259
13260 68
        if ($clean_utf8) {
13261
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13262
            // if invalid characters are found in $haystack before $needle
13263 1
            $str = self::clean($str);
13264
        }
13265
13266 68
        $use_mb_functions = $lang === null && !$try_to_keep_the_string_length;
13267
13268 68
        if ($encoding === 'UTF-8') {
13269 22
            $str_part_two = (string) \mb_substr($str, 1);
13270
13271 22
            if ($use_mb_functions) {
13272 22
                $str_part_one = \mb_strtoupper(
13273 22
                    (string) \mb_substr($str, 0, 1)
13274
                );
13275
            } else {
13276
                $str_part_one = self::strtoupper(
13277
                    (string) \mb_substr($str, 0, 1),
13278
                    $encoding,
13279
                    false,
13280
                    $lang,
13281 22
                    $try_to_keep_the_string_length
13282
                );
13283
            }
13284
        } else {
13285 47
            $encoding = self::normalize_encoding($encoding, 'UTF-8');
13286
13287 47
            $str_part_two = (string) self::substr($str, 1, null, $encoding);
13288
13289 47
            if ($use_mb_functions) {
13290 47
                $str_part_one = \mb_strtoupper(
13291 47
                    (string) \mb_substr($str, 0, 1, $encoding),
13292 47
                    $encoding
13293
                );
13294
            } else {
13295
                $str_part_one = self::strtoupper(
13296
                    (string) self::substr($str, 0, 1, $encoding),
13297
                    $encoding,
13298
                    false,
13299
                    $lang,
13300
                    $try_to_keep_the_string_length
13301
                );
13302
            }
13303
        }
13304
13305 68
        return $str_part_one . $str_part_two;
13306
    }
13307
13308
    /**
13309
     * alias for "UTF8::ucfirst()"
13310
     *
13311
     * @param string $str
13312
     * @param string $encoding
13313
     * @param bool   $clean_utf8
13314
     *
13315
     * @psalm-pure
13316
     *
13317
     * @return string
13318
     *
13319
     * @see        UTF8::ucfirst()
13320
     * @deprecated <p>please use "UTF8::ucfirst()"</p>
13321
     */
13322
    public static function ucword(
13323
        string $str,
13324
        string $encoding = 'UTF-8',
13325
        bool $clean_utf8 = false
13326
    ): string {
13327 1
        return self::ucfirst($str, $encoding, $clean_utf8);
13328
    }
13329
13330
    /**
13331
     * Uppercase for all words in the string.
13332
     *
13333
     * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code>
13334
     *
13335
     * @param string   $str        <p>The input string.</p>
13336
     * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
13337
     * @param string   $char_list  [optional] <p>Additional chars that contains to words and do not start a new
13338
     *                             word.</p>
13339
     * @param string   $encoding   [optional] <p>Set the charset.</p>
13340
     * @param bool     $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
13341
     *
13342
     * @psalm-pure
13343
     *
13344
     * @return string
13345
     */
13346
    public static function ucwords(
13347
        string $str,
13348
        array $exceptions = [],
13349
        string $char_list = '',
13350
        string $encoding = 'UTF-8',
13351
        bool $clean_utf8 = false
13352
    ): string {
13353 8
        if (!$str) {
13354 2
            return '';
13355
        }
13356
13357
        // INFO: mb_convert_case($str, MB_CASE_TITLE);
13358
        // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
13359
13360 7
        if ($clean_utf8) {
13361
            // "mb_strpos()" and "iconv_strpos()" returns wrong position,
13362
            // if invalid characters are found in $haystack before $needle
13363 1
            $str = self::clean($str);
13364
        }
13365
13366 7
        $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions));
13367
13368
        if (
13369 7
            $use_php_default_functions
13370
            &&
13371 7
            ASCII::is_ascii($str)
13372
        ) {
13373
            return \ucwords($str);
13374
        }
13375
13376 7
        $words = self::str_to_words($str, $char_list);
13377 7
        $use_exceptions = $exceptions !== [];
13378
13379 7
        $words_str = '';
13380 7
        foreach ($words as &$word) {
13381 7
            if (!$word) {
13382 7
                continue;
13383
            }
13384
13385
            if (
13386 7
                !$use_exceptions
13387
                ||
13388 7
                !\in_array($word, $exceptions, true)
13389
            ) {
13390 7
                $words_str .= self::ucfirst($word, $encoding);
13391
            } else {
13392 7
                $words_str .= $word;
13393
            }
13394
        }
13395
13396 7
        return $words_str;
13397
    }
13398
13399
    /**
13400
     * Multi decode HTML entity + fix urlencoded-win1252-chars.
13401
     *
13402
     * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code>
13403
     *
13404
     * e.g:
13405
     * 'test+test'                     => 'test test'
13406
     * 'D&#252;sseldorf'               => 'Düsseldorf'
13407
     * 'D%FCsseldorf'                  => 'Düsseldorf'
13408
     * 'D&#xFC;sseldorf'               => 'Düsseldorf'
13409
     * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
13410
     * 'Düsseldorf'                   => 'Düsseldorf'
13411
     * 'D%C3%BCsseldorf'               => 'Düsseldorf'
13412
     * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
13413
     * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
13414
     *
13415
     * @param string $str          <p>The input string.</p>
13416
     * @param bool   $multi_decode <p>Decode as often as possible.</p>
13417
     *
13418
     * @psalm-pure
13419
     *
13420
     * @return string
13421
     */
13422
    public static function urldecode(string $str, bool $multi_decode = true): string
13423
    {
13424 4
        if ($str === '') {
13425 3
            return '';
13426
        }
13427
13428
        if (
13429 4
            \strpos($str, '&') === false
13430
            &&
13431 4
            \strpos($str, '%') === false
13432
            &&
13433 4
            \strpos($str, '+') === false
13434
            &&
13435 4
            \strpos($str, '\u') === false
13436
        ) {
13437 3
            return self::fix_simple_utf8($str);
13438
        }
13439
13440 4
        $str = self::urldecode_unicode_helper($str);
13441
13442 4
        if ($multi_decode) {
13443
            do {
13444 3
                $str_compare = $str;
13445
13446
                /**
13447
                 * @psalm-suppress PossiblyInvalidArgument
13448
                 */
13449 3
                $str = self::fix_simple_utf8(
13450 3
                    \urldecode(
13451 3
                        self::html_entity_decode(
13452 3
                            self::to_utf8($str),
13453 3
                            \ENT_QUOTES | \ENT_HTML5
13454
                        )
13455
                    )
13456
                );
13457 3
            } while ($str_compare !== $str);
13458
        } else {
13459
            /**
13460
             * @psalm-suppress PossiblyInvalidArgument
13461
             */
13462 1
            $str = self::fix_simple_utf8(
13463 1
                \urldecode(
13464 1
                    self::html_entity_decode(
13465 1
                        self::to_utf8($str),
13466 1
                        \ENT_QUOTES | \ENT_HTML5
13467
                    )
13468
                )
13469
            );
13470
        }
13471
13472 4
        return $str;
13473
    }
13474
13475
    /**
13476
     * Return a array with "urlencoded"-win1252 -> UTF-8
13477
     *
13478
     * @psalm-pure
13479
     *
13480
     * @return string[]
13481
     *
13482
     * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p>
13483
     */
13484
    public static function urldecode_fix_win1252_chars(): array
13485
    {
13486
        return [
13487 2
            '%20' => ' ',
13488
            '%21' => '!',
13489
            '%22' => '"',
13490
            '%23' => '#',
13491
            '%24' => '$',
13492
            '%25' => '%',
13493
            '%26' => '&',
13494
            '%27' => "'",
13495
            '%28' => '(',
13496
            '%29' => ')',
13497
            '%2A' => '*',
13498
            '%2B' => '+',
13499
            '%2C' => ',',
13500
            '%2D' => '-',
13501
            '%2E' => '.',
13502
            '%2F' => '/',
13503
            '%30' => '0',
13504
            '%31' => '1',
13505
            '%32' => '2',
13506
            '%33' => '3',
13507
            '%34' => '4',
13508
            '%35' => '5',
13509
            '%36' => '6',
13510
            '%37' => '7',
13511
            '%38' => '8',
13512
            '%39' => '9',
13513
            '%3A' => ':',
13514
            '%3B' => ';',
13515
            '%3C' => '<',
13516
            '%3D' => '=',
13517
            '%3E' => '>',
13518
            '%3F' => '?',
13519
            '%40' => '@',
13520
            '%41' => 'A',
13521
            '%42' => 'B',
13522
            '%43' => 'C',
13523
            '%44' => 'D',
13524
            '%45' => 'E',
13525
            '%46' => 'F',
13526
            '%47' => 'G',
13527
            '%48' => 'H',
13528
            '%49' => 'I',
13529
            '%4A' => 'J',
13530
            '%4B' => 'K',
13531
            '%4C' => 'L',
13532
            '%4D' => 'M',
13533
            '%4E' => 'N',
13534
            '%4F' => 'O',
13535
            '%50' => 'P',
13536
            '%51' => 'Q',
13537
            '%52' => 'R',
13538
            '%53' => 'S',
13539
            '%54' => 'T',
13540
            '%55' => 'U',
13541
            '%56' => 'V',
13542
            '%57' => 'W',
13543
            '%58' => 'X',
13544
            '%59' => 'Y',
13545
            '%5A' => 'Z',
13546
            '%5B' => '[',
13547
            '%5C' => '\\',
13548
            '%5D' => ']',
13549
            '%5E' => '^',
13550
            '%5F' => '_',
13551
            '%60' => '`',
13552
            '%61' => 'a',
13553
            '%62' => 'b',
13554
            '%63' => 'c',
13555
            '%64' => 'd',
13556
            '%65' => 'e',
13557
            '%66' => 'f',
13558
            '%67' => 'g',
13559
            '%68' => 'h',
13560
            '%69' => 'i',
13561
            '%6A' => 'j',
13562
            '%6B' => 'k',
13563
            '%6C' => 'l',
13564
            '%6D' => 'm',
13565
            '%6E' => 'n',
13566
            '%6F' => 'o',
13567
            '%70' => 'p',
13568
            '%71' => 'q',
13569
            '%72' => 'r',
13570
            '%73' => 's',
13571
            '%74' => 't',
13572
            '%75' => 'u',
13573
            '%76' => 'v',
13574
            '%77' => 'w',
13575
            '%78' => 'x',
13576
            '%79' => 'y',
13577
            '%7A' => 'z',
13578
            '%7B' => '{',
13579
            '%7C' => '|',
13580
            '%7D' => '}',
13581
            '%7E' => '~',
13582
            '%7F' => '',
13583
            '%80' => '`',
13584
            '%81' => '',
13585
            '%82' => '‚',
13586
            '%83' => 'ƒ',
13587
            '%84' => '„',
13588
            '%85' => '…',
13589
            '%86' => '†',
13590
            '%87' => '‡',
13591
            '%88' => 'ˆ',
13592
            '%89' => '‰',
13593
            '%8A' => 'Š',
13594
            '%8B' => '‹',
13595
            '%8C' => 'Œ',
13596
            '%8D' => '',
13597
            '%8E' => 'Ž',
13598
            '%8F' => '',
13599
            '%90' => '',
13600
            '%91' => '‘',
13601
            '%92' => '’',
13602
            '%93' => '“',
13603
            '%94' => '”',
13604
            '%95' => '•',
13605
            '%96' => '–',
13606
            '%97' => '—',
13607
            '%98' => '˜',
13608
            '%99' => '™',
13609
            '%9A' => 'š',
13610
            '%9B' => '›',
13611
            '%9C' => 'œ',
13612
            '%9D' => '',
13613
            '%9E' => 'ž',
13614
            '%9F' => 'Ÿ',
13615
            '%A0' => '',
13616
            '%A1' => '¡',
13617
            '%A2' => '¢',
13618
            '%A3' => '£',
13619
            '%A4' => '¤',
13620
            '%A5' => '¥',
13621
            '%A6' => '¦',
13622
            '%A7' => '§',
13623
            '%A8' => '¨',
13624
            '%A9' => '©',
13625
            '%AA' => 'ª',
13626
            '%AB' => '«',
13627
            '%AC' => '¬',
13628
            '%AD' => '',
13629
            '%AE' => '®',
13630
            '%AF' => '¯',
13631
            '%B0' => '°',
13632
            '%B1' => '±',
13633
            '%B2' => '²',
13634
            '%B3' => '³',
13635
            '%B4' => '´',
13636
            '%B5' => 'µ',
13637
            '%B6' => '¶',
13638
            '%B7' => '·',
13639
            '%B8' => '¸',
13640
            '%B9' => '¹',
13641
            '%BA' => 'º',
13642
            '%BB' => '»',
13643
            '%BC' => '¼',
13644
            '%BD' => '½',
13645
            '%BE' => '¾',
13646
            '%BF' => '¿',
13647
            '%C0' => 'À',
13648
            '%C1' => 'Á',
13649
            '%C2' => 'Â',
13650
            '%C3' => 'Ã',
13651
            '%C4' => 'Ä',
13652
            '%C5' => 'Å',
13653
            '%C6' => 'Æ',
13654
            '%C7' => 'Ç',
13655
            '%C8' => 'È',
13656
            '%C9' => 'É',
13657
            '%CA' => 'Ê',
13658
            '%CB' => 'Ë',
13659
            '%CC' => 'Ì',
13660
            '%CD' => 'Í',
13661
            '%CE' => 'Î',
13662
            '%CF' => 'Ï',
13663
            '%D0' => 'Ð',
13664
            '%D1' => 'Ñ',
13665
            '%D2' => 'Ò',
13666
            '%D3' => 'Ó',
13667
            '%D4' => 'Ô',
13668
            '%D5' => 'Õ',
13669
            '%D6' => 'Ö',
13670
            '%D7' => '×',
13671
            '%D8' => 'Ø',
13672
            '%D9' => 'Ù',
13673
            '%DA' => 'Ú',
13674
            '%DB' => 'Û',
13675
            '%DC' => 'Ü',
13676
            '%DD' => 'Ý',
13677
            '%DE' => 'Þ',
13678
            '%DF' => 'ß',
13679
            '%E0' => 'à',
13680
            '%E1' => 'á',
13681
            '%E2' => 'â',
13682
            '%E3' => 'ã',
13683
            '%E4' => 'ä',
13684
            '%E5' => 'å',
13685
            '%E6' => 'æ',
13686
            '%E7' => 'ç',
13687
            '%E8' => 'è',
13688
            '%E9' => 'é',
13689
            '%EA' => 'ê',
13690
            '%EB' => 'ë',
13691
            '%EC' => 'ì',
13692
            '%ED' => 'í',
13693
            '%EE' => 'î',
13694
            '%EF' => 'ï',
13695
            '%F0' => 'ð',
13696
            '%F1' => 'ñ',
13697
            '%F2' => 'ò',
13698
            '%F3' => 'ó',
13699
            '%F4' => 'ô',
13700
            '%F5' => 'õ',
13701
            '%F6' => 'ö',
13702
            '%F7' => '÷',
13703
            '%F8' => 'ø',
13704
            '%F9' => 'ù',
13705
            '%FA' => 'ú',
13706
            '%FB' => 'û',
13707
            '%FC' => 'ü',
13708
            '%FD' => 'ý',
13709
            '%FE' => 'þ',
13710
            '%FF' => 'ÿ',
13711
        ];
13712
    }
13713
13714
    /**
13715
     * Decodes a UTF-8 string to ISO-8859-1.
13716
     *
13717
     * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code>
13718
     *
13719
     * @param string $str             <p>The input string.</p>
13720
     * @param bool   $keep_utf8_chars
13721
     *
13722
     * @psalm-pure
13723
     *
13724
     * @return string
13725
     *
13726
     * @noinspection SuspiciousBinaryOperationInspection
13727
     */
13728
    public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string
13729
    {
13730 14
        if ($str === '') {
13731 6
            return '';
13732
        }
13733
13734
        // save for later comparision
13735 14
        $str_backup = $str;
13736 14
        $len = \strlen($str);
13737
13738 14
        if (self::$ORD === null) {
13739
            self::$ORD = self::getData('ord');
13740
        }
13741
13742 14
        if (self::$CHR === null) {
13743
            self::$CHR = self::getData('chr');
13744
        }
13745
13746 14
        $no_char_found = '?';
13747
        /** @noinspection ForeachInvariantsInspection */
13748 14
        for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
13749 14
            switch ($str[$i] & "\xF0") {
13750 14
                case "\xC0":
13751 13
                case "\xD0":
13752 13
                    $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
13753 13
                    $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found;
13754
13755 13
                    break;
13756
13757
                /** @noinspection PhpMissingBreakStatementInspection */
13758 13
                case "\xF0":
13759
                    ++$i;
13760
13761
                // no break
13762
13763 13
                case "\xE0":
13764 11
                    $str[$j] = $no_char_found;
13765 11
                    $i += 2;
13766
13767 11
                    break;
13768
13769
                default:
13770 12
                    $str[$j] = $str[$i];
13771
            }
13772
        }
13773
13774
        /** @var false|string $return - needed for PhpStan (stubs error) */
13775 14
        $return = \substr($str, 0, $j);
13776 14
        if ($return === false) {
13777
            $return = '';
13778
        }
13779
13780
        if (
13781 14
            $keep_utf8_chars
13782
            &&
13783 14
            (int) self::strlen($return) >= (int) self::strlen($str_backup)
13784
        ) {
13785 2
            return $str_backup;
13786
        }
13787
13788 14
        return $return;
13789
    }
13790
13791
    /**
13792
     * Encodes an ISO-8859-1 string to UTF-8.
13793
     *
13794
     * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code>
13795
     *
13796
     * @param string $str <p>The input string.</p>
13797
     *
13798
     * @psalm-pure
13799
     *
13800
     * @return string
13801
     */
13802
    public static function utf8_encode(string $str): string
13803
    {
13804 16
        if ($str === '') {
13805 14
            return '';
13806
        }
13807
13808
        /** @var false|string $str - the polyfill maybe return false */
13809 16
        $str = \utf8_encode($str);
0 ignored issues
show
Bug introduced by
It seems like $str can also be of type false; however, parameter $data of utf8_encode() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

13809
        $str = \utf8_encode(/** @scrutinizer ignore-type */ $str);
Loading history...
13810
13811
        /** @noinspection CallableParameterUseCaseInTypeContextInspection */
13812
        /** @psalm-suppress TypeDoesNotContainType */
13813 16
        if ($str === false) {
13814
            return '';
13815
        }
13816
13817 16
        return $str;
13818
    }
13819
13820
    /**
13821
     * fix -> utf8-win1252 chars
13822
     *
13823
     * @param string $str <p>The input string.</p>
13824
     *
13825
     * @psalm-pure
13826
     *
13827
     * @return string
13828
     *
13829
     * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p>
13830
     */
13831
    public static function utf8_fix_win1252_chars(string $str): string
13832
    {
13833 2
        return self::fix_simple_utf8($str);
13834
    }
13835
13836
    /**
13837
     * Returns an array with all utf8 whitespace characters.
13838
     *
13839
     * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
13840
     *
13841
     * @psalm-pure
13842
     *
13843
     * @return string[]
13844
     *                  An array with all known whitespace characters as values and the type of whitespace as keys
13845
     *                  as defined in above URL
13846
     */
13847
    public static function whitespace_table(): array
13848
    {
13849 2
        return self::$WHITESPACE_TABLE;
13850
    }
13851
13852
    /**
13853
     * Limit the number of words in a string.
13854
     *
13855
     * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code>
13856
     *
13857
     * @param string $str        <p>The input string.</p>
13858
     * @param int    $limit      <p>The limit of words as integer.</p>
13859
     * @param string $str_add_on <p>Replacement for the striped string.</p>
13860
     *
13861
     * @psalm-pure
13862
     *
13863
     * @return string
13864
     */
13865
    public static function words_limit(
13866
        string $str,
13867
        int $limit = 100,
13868
        string $str_add_on = '…'
13869
    ): string {
13870 2
        if ($str === '' || $limit < 1) {
13871 2
            return '';
13872
        }
13873
13874 2
        \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches);
13875
13876
        if (
13877 2
            !isset($matches[0])
13878
            ||
13879 2
            \mb_strlen($str) === (int) \mb_strlen($matches[0])
13880
        ) {
13881 2
            return $str;
13882
        }
13883
13884 2
        return \rtrim($matches[0]) . $str_add_on;
13885
    }
13886
13887
    /**
13888
     * Wraps a string to a given number of characters
13889
     *
13890
     * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code>
13891
     *
13892
     * @see http://php.net/manual/en/function.wordwrap.php
13893
     *
13894
     * @param string $str   <p>The input string.</p>
13895
     * @param int    $width [optional] <p>The column width.</p>
13896
     * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
13897
     * @param bool   $cut   [optional] <p>
13898
     *                      If the cut is set to true, the string is
13899
     *                      always wrapped at or before the specified width. So if you have
13900
     *                      a word that is larger than the given width, it is broken apart.
13901
     *                      </p>
13902
     *
13903
     * @psalm-pure
13904
     *
13905
     * @return string
13906
     *                <p>The given string wrapped at the specified column.</p>
13907
     */
13908
    public static function wordwrap(
13909
        string $str,
13910
        int $width = 75,
13911
        string $break = "\n",
13912
        bool $cut = false
13913
    ): string {
13914 12
        if ($str === '' || $break === '') {
13915 4
            return '';
13916
        }
13917
13918 10
        $str_split = \explode($break, $str);
13919 10
        if ($str_split === false) {
13920
            return '';
13921
        }
13922
13923
        /** @var string[] $charsArray */
13924 10
        $charsArray = [];
13925 10
        $word_split = '';
13926 10
        foreach ($str_split as $i => $i_value) {
13927 10
            if ($i) {
13928 3
                $charsArray[] = $break;
13929 3
                $word_split .= '#';
13930
            }
13931
13932 10
            foreach (self::str_split($i_value) as $c) {
13933 10
                $charsArray[] = $c;
13934 10
                if ($c === ' ') {
13935 3
                    $word_split .= ' ';
13936
                } else {
13937 10
                    $word_split .= '?';
13938
                }
13939
            }
13940
        }
13941
13942 10
        $str_return = '';
13943 10
        $j = 0;
13944 10
        $b = -1;
13945 10
        $i = -1;
13946 10
        $word_split = \wordwrap($word_split, $width, '#', $cut);
13947
13948 10
        $max = \mb_strlen($word_split);
13949 10
        while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) {
13950 8
            for (++$i; $i < $b; ++$i) {
13951 8
                if (isset($charsArray[$j])) {
13952 8
                    $str_return .= $charsArray[$j];
13953 8
                    unset($charsArray[$j]);
13954
                }
13955 8
                ++$j;
13956
13957
                // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13958 8
                if ($i > $max) {
13959
                    break 2;
13960
                }
13961
            }
13962
13963
            if (
13964 8
                $break === $charsArray[$j]
13965
                ||
13966 8
                $charsArray[$j] === ' '
13967
            ) {
13968 5
                unset($charsArray[$j++]);
13969
            }
13970
13971 8
            $str_return .= $break;
13972
13973
            // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill
13974 8
            if ($b > $max) {
13975
                break;
13976
            }
13977
        }
13978
13979 10
        return $str_return . \implode('', $charsArray);
13980
    }
13981
13982
    /**
13983
     * Line-Wrap the string after $limit, but split the string by "$delimiter" before ...
13984
     *    ... so that we wrap the per line.
13985
     *
13986
     * @param string      $str             <p>The input string.</p>
13987
     * @param int         $width           [optional] <p>The column width.</p>
13988
     * @param string      $break           [optional] <p>The line is broken using the optional break parameter.</p>
13989
     * @param bool        $cut             [optional] <p>
13990
     *                                     If the cut is set to true, the string is
13991
     *                                     always wrapped at or before the specified width. So if you have
13992
     *                                     a word that is larger than the given width, it is broken apart.
13993
     *                                     </p>
13994
     * @param bool        $add_final_break [optional] <p>
13995
     *                                     If this flag is true, then the method will add a $break at the end
13996
     *                                     of the result string.
13997
     *                                     </p>
13998
     * @param string|null $delimiter       [optional] <p>
13999
     *                                     You can change the default behavior, where we split the string by newline.
14000
     *                                     </p>
14001
     *
14002
     * @psalm-pure
14003
     *
14004
     * @return string
14005
     */
14006
    public static function wordwrap_per_line(
14007
        string $str,
14008
        int $width = 75,
14009
        string $break = "\n",
14010
        bool $cut = false,
14011
        bool $add_final_break = true,
14012
        string $delimiter = null
14013
    ): string {
14014 1
        if ($delimiter === null) {
14015 1
            $strings = \preg_split('/\\r\\n|\\r|\\n/', $str);
14016
        } else {
14017 1
            $strings = \explode($delimiter, $str);
14018
        }
14019
14020 1
        $string_helper_array = [];
14021 1
        if ($strings !== false) {
14022 1
            foreach ($strings as $value) {
14023 1
                $string_helper_array[] = self::wordwrap($value, $width, $break, $cut);
14024
            }
14025
        }
14026
14027 1
        if ($add_final_break) {
14028 1
            $final_break = $break;
14029
        } else {
14030 1
            $final_break = '';
14031
        }
14032
14033 1
        return \implode($delimiter ?? "\n", $string_helper_array) . $final_break;
14034
    }
14035
14036
    /**
14037
     * Returns an array of Unicode White Space characters.
14038
     *
14039
     * @psalm-pure
14040
     *
14041
     * @return string[]
14042
     *                  <p>An array with numeric code point as key and White Space Character as value.</p>
14043
     */
14044
    public static function ws(): array
14045
    {
14046 2
        return self::$WHITESPACE;
14047
    }
14048
14049
    /**
14050
     * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters.
14051
     *
14052
     * EXAMPLE: <code>
14053
     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
14054
     * //
14055
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
14056
     * </code>
14057
     *
14058
     * @see          http://hsivonen.iki.fi/php-utf8/
14059
     *
14060
     * @param string $str    <p>The string to be checked.</p>
14061
     * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
14062
     *
14063
     * @psalm-pure
14064
     *
14065
     * @return bool
14066
     *
14067
     * @noinspection ReturnTypeCanBeDeclaredInspection
14068
     */
14069
    private static function is_utf8_string(string $str, bool $strict = false)
14070
    {
14071 110
        if ($str === '') {
14072 15
            return true;
14073
        }
14074
14075 103
        if ($strict) {
14076 2
            $is_binary = self::is_binary($str, true);
14077
14078 2
            if ($is_binary && self::is_utf16($str, false) !== false) {
14079 2
                return false;
14080
            }
14081
14082
            if ($is_binary && self::is_utf32($str, false) !== false) {
14083
                return false;
14084
            }
14085
        }
14086
14087 103
        if (self::$SUPPORT['pcre_utf8']) {
14088
            // If even just the first character can be matched, when the /u
14089
            // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
14090
            // invalid, nothing at all will match, even if the string contains
14091
            // some valid sequences
14092 103
            return \preg_match('/^./us', $str) === 1;
14093
        }
14094
14095 2
        $mState = 0; // cached expected number of octets after the current octet
14096
        // until the beginning of the next UTF8 character sequence
14097 2
        $mUcs4 = 0; // cached Unicode character
14098 2
        $mBytes = 1; // cached expected number of octets in the current sequence
14099
14100 2
        if (self::$ORD === null) {
14101
            self::$ORD = self::getData('ord');
14102
        }
14103
14104 2
        $len = \strlen($str);
14105
        /** @noinspection ForeachInvariantsInspection */
14106 2
        for ($i = 0; $i < $len; ++$i) {
14107 2
            $in = self::$ORD[$str[$i]];
14108
14109 2
            if ($mState === 0) {
14110
                // When mState is zero we expect either a US-ASCII character or a
14111
                // multi-octet sequence.
14112 2
                if ((0x80 & $in) === 0) {
14113
                    // US-ASCII, pass straight through.
14114 2
                    $mBytes = 1;
14115 2
                } elseif ((0xE0 & $in) === 0xC0) {
14116
                    // First octet of 2 octet sequence.
14117 2
                    $mUcs4 = $in;
14118 2
                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
14119 2
                    $mState = 1;
14120 2
                    $mBytes = 2;
14121 2
                } elseif ((0xF0 & $in) === 0xE0) {
14122
                    // First octet of 3 octet sequence.
14123 2
                    $mUcs4 = $in;
14124 2
                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
14125 2
                    $mState = 2;
14126 2
                    $mBytes = 3;
14127
                } elseif ((0xF8 & $in) === 0xF0) {
14128
                    // First octet of 4 octet sequence.
14129
                    $mUcs4 = $in;
14130
                    $mUcs4 = ($mUcs4 & 0x07) << 18;
14131
                    $mState = 3;
14132
                    $mBytes = 4;
14133
                } elseif ((0xFC & $in) === 0xF8) {
14134
                    /* First octet of 5 octet sequence.
14135
                     *
14136
                     * This is illegal because the encoded codepoint must be either
14137
                     * (a) not the shortest form or
14138
                     * (b) outside the Unicode range of 0-0x10FFFF.
14139
                     * Rather than trying to resynchronize, we will carry on until the end
14140
                     * of the sequence and let the later error handling code catch it.
14141
                     */
14142
                    $mUcs4 = $in;
14143
                    $mUcs4 = ($mUcs4 & 0x03) << 24;
14144
                    $mState = 4;
14145
                    $mBytes = 5;
14146
                } elseif ((0xFE & $in) === 0xFC) {
14147
                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
14148
                    $mUcs4 = $in;
14149
                    $mUcs4 = ($mUcs4 & 1) << 30;
14150
                    $mState = 5;
14151
                    $mBytes = 6;
14152
                } else {
14153
                    // Current octet is neither in the US-ASCII range nor a legal first
14154
                    // octet of a multi-octet sequence.
14155 2
                    return false;
14156
                }
14157 2
            } elseif ((0xC0 & $in) === 0x80) {
14158
14159
                // When mState is non-zero, we expect a continuation of the multi-octet
14160
                // sequence
14161
14162
                // Legal continuation.
14163 2
                $shift = ($mState - 1) * 6;
14164 2
                $tmp = $in;
14165 2
                $tmp = ($tmp & 0x0000003F) << $shift;
14166 2
                $mUcs4 |= $tmp;
14167
                // Prefix: End of the multi-octet sequence. mUcs4 now contains the final
14168
                // Unicode code point to be output.
14169 2
                if (--$mState === 0) {
14170
                    // Check for illegal sequences and code points.
14171
                    //
14172
                    // From Unicode 3.1, non-shortest form is illegal
14173
                    if (
14174 2
                        ($mBytes === 2 && $mUcs4 < 0x0080)
14175
                        ||
14176 2
                        ($mBytes === 3 && $mUcs4 < 0x0800)
14177
                        ||
14178 2
                        ($mBytes === 4 && $mUcs4 < 0x10000)
14179
                        ||
14180 2
                        ($mBytes > 4)
14181
                        ||
14182
                        // From Unicode 3.2, surrogate characters are illegal.
14183 2
                        (($mUcs4 & 0xFFFFF800) === 0xD800)
14184
                        ||
14185
                        // Code points outside the Unicode range are illegal.
14186 2
                        ($mUcs4 > 0x10FFFF)
14187
                    ) {
14188
                        return false;
14189
                    }
14190
                    // initialize UTF8 cache
14191 2
                    $mState = 0;
14192 2
                    $mUcs4 = 0;
14193 2
                    $mBytes = 1;
14194
                }
14195
            } else {
14196
                // ((0xC0 & (*in) != 0x80) && (mState != 0))
14197
                // Incomplete multi-octet sequence.
14198
                return false;
14199
            }
14200
        }
14201
14202 2
        return $mState === 0;
14203
    }
14204
14205
    /**
14206
     * @param string $str
14207
     * @param bool   $use_lowercase      <p>Use uppercase by default, otherwise use lowercase.</p>
14208
     * @param bool   $use_full_case_fold <p>Convert not only common cases.</p>
14209
     *
14210
     * @psalm-pure
14211
     *
14212
     * @return string
14213
     *
14214
     * @noinspection ReturnTypeCanBeDeclaredInspection
14215
     */
14216
    private static function fixStrCaseHelper(
14217
        string $str,
14218
        bool $use_lowercase = false,
14219
        bool $use_full_case_fold = false
14220
    ) {
14221 33
        $upper = self::$COMMON_CASE_FOLD['upper'];
14222 33
        $lower = self::$COMMON_CASE_FOLD['lower'];
14223
14224 33
        if ($use_lowercase) {
14225 2
            $str = \str_replace(
14226 2
                $upper,
14227 2
                $lower,
14228 2
                $str
14229
            );
14230
        } else {
14231 31
            $str = \str_replace(
14232 31
                $lower,
14233 31
                $upper,
14234 31
                $str
14235
            );
14236
        }
14237
14238 33
        if ($use_full_case_fold) {
14239
            /**
14240
             * @psalm-suppress ImpureStaticVariable
14241
             *
14242
             * @var array<mixed>|null
14243
             */
14244 31
            static $FULL_CASE_FOLD = null;
14245 31
            if ($FULL_CASE_FOLD === null) {
14246 1
                $FULL_CASE_FOLD = self::getData('caseFolding_full');
14247
            }
14248
14249 31
            if ($use_lowercase) {
14250 2
                $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
14251
            } else {
14252 29
                $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str);
14253
            }
14254
        }
14255
14256 33
        return $str;
14257
    }
14258
14259
    /**
14260
     * get data from "/data/*.php"
14261
     *
14262
     * @param string $file
14263
     *
14264
     * @psalm-pure
14265
     *
14266
     * @return array
14267
     *
14268
     * @noinspection ReturnTypeCanBeDeclaredInspection
14269
     */
14270
    private static function getData(string $file)
14271
    {
14272
        /** @noinspection PhpIncludeInspection */
14273
        /** @noinspection UsingInclusionReturnValueInspection */
14274
        /** @psalm-suppress UnresolvableInclude */
14275 6
        return include __DIR__ . '/data/' . $file . '.php';
14276
    }
14277
14278
    /**
14279
     * @psalm-pure
14280
     *
14281
     * @return true|null
14282
     */
14283
    private static function initEmojiData()
14284
    {
14285 12
        if (self::$EMOJI_KEYS_CACHE === null) {
14286 1
            if (self::$EMOJI === null) {
14287 1
                self::$EMOJI = self::getData('emoji');
14288
            }
14289
14290
            /**
14291
             * @psalm-suppress ImpureFunctionCall - static sort function is used
14292
             */
14293 1
            \uksort(
14294 1
                self::$EMOJI,
14295
                static function (string $a, string $b): int {
14296 1
                    return \strlen($b) <=> \strlen($a);
14297 1
                }
14298
            );
14299
14300 1
            self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI);
14301 1
            self::$EMOJI_VALUES_CACHE = self::$EMOJI;
14302
14303 1
            foreach (self::$EMOJI_KEYS_CACHE as $key) {
14304 1
                $tmp_key = \crc32($key);
14305 1
                self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_';
14306
            }
14307
14308 1
            return true;
14309
        }
14310
14311 12
        return null;
14312
    }
14313
14314
    /**
14315
     * Checks whether mbstring "overloaded" is active on the server.
14316
     *
14317
     * @psalm-pure
14318
     *
14319
     * @return bool
14320
     *
14321
     * @noinspection ReturnTypeCanBeDeclaredInspection
14322
     */
14323
    private static function mbstring_overloaded()
14324
    {
14325
        /**
14326
         * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2
14327
         */
14328
14329
        /** @noinspection PhpComposerExtensionStubsInspection */
14330
        /** @noinspection PhpUsageOfSilenceOperatorInspection */
14331
        return \defined('MB_OVERLOAD_STRING')
14332
               &&
14333
               ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING);
14334
    }
14335
14336
    /**
14337
     * @param array    $strings
14338
     * @param bool     $remove_empty_values
14339
     * @param int|null $remove_short_values
14340
     *
14341
     * @psalm-pure
14342
     *
14343
     * @return array
14344
     *
14345
     * @noinspection ReturnTypeCanBeDeclaredInspection
14346
     */
14347
    private static function reduce_string_array(
14348
        array $strings,
14349
        bool $remove_empty_values,
14350
        int $remove_short_values = null
14351
    ) {
14352
        // init
14353 2
        $return = [];
14354
14355 2
        foreach ($strings as &$str) {
14356
            if (
14357 2
                $remove_short_values !== null
14358
                &&
14359 2
                \mb_strlen($str) <= $remove_short_values
14360
            ) {
14361 2
                continue;
14362
            }
14363
14364
            if (
14365 2
                $remove_empty_values
14366
                &&
14367 2
                \trim($str) === ''
14368
            ) {
14369 2
                continue;
14370
            }
14371
14372 2
            $return[] = $str;
14373
        }
14374
14375 2
        return $return;
14376
    }
14377
14378
    /**
14379
     * rxClass
14380
     *
14381
     * @param string $s
14382
     * @param string $class
14383
     *
14384
     * @psalm-pure
14385
     *
14386
     * @return string
14387
     *
14388
     * @noinspection ReturnTypeCanBeDeclaredInspection
14389
     */
14390
    private static function rxClass(string $s, string $class = '')
14391
    {
14392
        /**
14393
         * @psalm-suppress ImpureStaticVariable
14394
         *
14395
         * @var array<string,string>
14396
         */
14397 33
        static $RX_CLASS_CACHE = [];
14398
14399 33
        $cache_key = $s . '_' . $class;
14400
14401 33
        if (isset($RX_CLASS_CACHE[$cache_key])) {
14402 21
            return $RX_CLASS_CACHE[$cache_key];
14403
        }
14404
14405
        /** @var string[] $class_array */
14406 16
        $class_array[] = $class;
0 ignored issues
show
Comprehensibility Best Practice introduced by
$class_array was never initialized. Although not strictly required by PHP, it is generally a good practice to add $class_array = array(); before regardless.
Loading history...
14407
14408
        /** @noinspection SuspiciousLoopInspection */
14409
        /** @noinspection AlterInForeachInspection */
14410 16
        foreach (self::str_split($s) as &$s) {
0 ignored issues
show
Bug introduced by
The expression self::str_split($s) cannot be used as a reference.

Let?s assume that you have the following foreach statement:

foreach ($array as &$itemValue) { }

$itemValue is assigned by reference. This is possible because the expression (in the example $array) can be used as a reference target.

However, if we were to replace $array with something different like the result of a function call as in

foreach (getArray() as &$itemValue) { }

then assigning by reference is not possible anymore as there is no target that could be modified.

Available Fixes

1. Do not assign by reference
foreach (getArray() as $itemValue) { }
2. Assign to a local variable first
$array = getArray();
foreach ($array as &$itemValue) {}
3. Return a reference
function &getArray() { $array = array(); return $array; }

foreach (getArray() as &$itemValue) { }
Loading history...
14411 15
            if ($s === '-') {
14412
                $class_array[0] = '-' . $class_array[0];
14413 15
            } elseif (!isset($s[2])) {
14414 15
                $class_array[0] .= \preg_quote($s, '/');
14415 1
            } elseif (self::strlen($s) === 1) {
14416 1
                $class_array[0] .= $s;
14417
            } else {
14418 15
                $class_array[] = $s;
14419
            }
14420
        }
14421
14422 16
        if ($class_array[0]) {
14423 16
            $class_array[0] = '[' . $class_array[0] . ']';
14424
        }
14425
14426 16
        if (\count($class_array) === 1) {
14427 16
            $return = $class_array[0];
14428
        } else {
14429
            $return = '(?:' . \implode('|', $class_array) . ')';
14430
        }
14431
14432 16
        $RX_CLASS_CACHE[$cache_key] = $return;
14433
14434 16
        return $return;
14435
    }
14436
14437
    /**
14438
     * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius").
14439
     *
14440
     * @param string $names
14441
     * @param string $delimiter
14442
     * @param string $encoding
14443
     *
14444
     * @psalm-pure
14445
     *
14446
     * @return string
14447
     *
14448
     * @noinspection ReturnTypeCanBeDeclaredInspection
14449
     */
14450
    private static function str_capitalize_name_helper(
14451
        string $names,
14452
        string $delimiter,
14453
        string $encoding = 'UTF-8'
14454
    ) {
14455
        // init
14456 1
        $name_helper_array = \explode($delimiter, $names);
14457 1
        if ($name_helper_array === false) {
14458
            return '';
14459
        }
14460
14461
        $special_cases = [
14462 1
            'names' => [
14463
                'ab',
14464
                'af',
14465
                'al',
14466
                'and',
14467
                'ap',
14468
                'bint',
14469
                'binte',
14470
                'da',
14471
                'de',
14472
                'del',
14473
                'den',
14474
                'der',
14475
                'di',
14476
                'dit',
14477
                'ibn',
14478
                'la',
14479
                'mac',
14480
                'nic',
14481
                'of',
14482
                'ter',
14483
                'the',
14484
                'und',
14485
                'van',
14486
                'von',
14487
                'y',
14488
                'zu',
14489
            ],
14490
            'prefixes' => [
14491
                'al-',
14492
                "d'",
14493
                'ff',
14494
                "l'",
14495
                'mac',
14496
                'mc',
14497
                'nic',
14498
            ],
14499
        ];
14500
14501 1
        foreach ($name_helper_array as &$name) {
14502 1
            if (\in_array($name, $special_cases['names'], true)) {
14503 1
                continue;
14504
            }
14505
14506 1
            $continue = false;
14507
14508 1
            if ($delimiter === '-') {
14509
                /** @noinspection AlterInForeachInspection */
14510 1
                foreach ((array) $special_cases['names'] as &$beginning) {
14511 1
                    if (self::strpos($name, $beginning, 0, $encoding) === 0) {
14512 1
                        $continue = true;
14513
14514 1
                        break;
14515
                    }
14516
                }
14517
            }
14518
14519
            /** @noinspection AlterInForeachInspection */
14520 1
            foreach ((array) $special_cases['prefixes'] as &$beginning) {
14521 1
                if (self::strpos($name, $beginning, 0, $encoding) === 0) {
14522 1
                    $continue = true;
14523
14524 1
                    break;
14525
                }
14526
            }
14527
14528 1
            if ($continue) {
14529 1
                continue;
14530
            }
14531
14532 1
            $name = self::ucfirst($name);
14533
        }
14534
14535 1
        return \implode($delimiter, $name_helper_array);
14536
    }
14537
14538
    /**
14539
     * Generic case-sensitive transformation for collation matching.
14540
     *
14541
     * @param string $str <p>The input string</p>
14542
     *
14543
     * @psalm-pure
14544
     *
14545
     * @return string|null
14546
     */
14547
    private static function strtonatfold(string $str)
14548
    {
14549
        /** @noinspection PhpUndefinedClassInspection */
14550 6
        return \preg_replace(
14551 6
            '/\p{Mn}+/u',
14552 6
            '',
14553 6
            \Normalizer::normalize($str, \Normalizer::NFD)
14554
        );
14555
    }
14556
14557
    /**
14558
     * @param int|string $input
14559
     *
14560
     * @psalm-pure
14561
     *
14562
     * @return string
14563
     *
14564
     * @noinspection ReturnTypeCanBeDeclaredInspection
14565
     * @noinspection SuspiciousBinaryOperationInspection
14566
     */
14567
    private static function to_utf8_convert_helper($input)
14568
    {
14569
        // init
14570 32
        $buf = '';
14571
14572 32
        if (self::$ORD === null) {
14573 1
            self::$ORD = self::getData('ord');
14574
        }
14575
14576 32
        if (self::$CHR === null) {
14577 1
            self::$CHR = self::getData('chr');
14578
        }
14579
14580 32
        if (self::$WIN1252_TO_UTF8 === null) {
14581 1
            self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8');
14582
        }
14583
14584 32
        $ordC1 = self::$ORD[$input];
14585 32
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
14586 32
            $buf .= self::$WIN1252_TO_UTF8[$ordC1];
14587
        } else {
14588
            /** @noinspection OffsetOperationsInspection */
14589 1
            $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
14590 1
            $cc2 = ((string) $input & "\x3F") | "\x80";
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise | or did you mean ||?
Loading history...
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
14591 1
            $buf .= $cc1 . $cc2;
14592
        }
14593
14594 32
        return $buf;
14595
    }
14596
14597
    /**
14598
     * @param string $str
14599
     *
14600
     * @psalm-pure
14601
     *
14602
     * @return string
14603
     *
14604
     * @noinspection ReturnTypeCanBeDeclaredInspection
14605
     */
14606
    private static function urldecode_unicode_helper(string $str)
14607
    {
14608 10
        $pattern = '/%u([0-9a-fA-F]{3,4})/';
14609 10
        if (\preg_match($pattern, $str)) {
14610 7
            $str = (string) \preg_replace($pattern, '&#x\\1;', $str);
14611
        }
14612
14613 10
        return $str;
14614
    }
14615
}
14616